From d54c799db4a9b1362afb57b3b4f2ddb8cab97901 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Thu, 26 Mar 2026 17:11:19 -0400 Subject: [PATCH 01/21] implement trace stats for serverless compat --- Cargo.lock | 92 ++++++++- LICENSE-3rdparty.csv | 2 + crates/datadog-serverless-compat/src/main.rs | 40 +++- crates/datadog-trace-agent/Cargo.toml | 8 +- crates/datadog-trace-agent/src/config.rs | 6 + crates/datadog-trace-agent/src/lib.rs | 2 + crates/datadog-trace-agent/src/mini_agent.rs | 9 +- .../src/stats_concentrator_service.rs | 177 +++++++++++++++++ .../datadog-trace-agent/src/stats_flusher.rs | 162 +++++++++++----- .../src/stats_generator.rs | 64 +++++++ .../src/trace_processor.rs | 25 ++- .../tests/common/helpers.rs | 5 + .../tests/common/mock_server.rs | 16 +- .../datadog-trace-agent/tests/common/mocks.rs | 9 +- .../tests/integration_test.rs | 178 +++++++++++++++--- 15 files changed, 696 insertions(+), 99 deletions(-) create mode 100644 crates/datadog-trace-agent/src/stats_concentrator_service.rs create mode 100644 crates/datadog-trace-agent/src/stats_generator.rs diff --git a/Cargo.lock b/Cargo.lock index 9a49068c..46feb1f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -568,8 +568,10 @@ dependencies = [ "libdd-capabilities", "libdd-capabilities-impl", "libdd-common 3.0.2", + "libdd-library-config", "libdd-trace-obfuscation", "libdd-trace-protobuf 3.0.1", + "libdd-trace-stats 2.0.0", "libdd-trace-utils 3.0.1", "reqwest", "rmp-serde", @@ -578,6 +580,7 @@ dependencies = [ "serial_test", "temp-env", "tempfile", + "thiserror 1.0.69", "tokio", "tracing", ] @@ -698,7 +701,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -1529,12 +1532,12 @@ dependencies = [ "http", "http-body-util", "libdd-common 2.0.1", - "libdd-ddsketch", + "libdd-ddsketch 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "libdd-dogstatsd-client", "libdd-telemetry", "libdd-tinybytes 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "libdd-trace-protobuf 2.0.0", - "libdd-trace-stats", + "libdd-trace-stats 1.0.3", "libdd-trace-utils 2.0.2", "rmp-serde", "serde", @@ -1555,6 +1558,14 @@ dependencies = [ "prost 0.14.3", ] +[[package]] +name = "libdd-ddsketch" +version = "1.0.1" +source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +dependencies = [ + "prost 0.14.3", +] + [[package]] name = "libdd-dogstatsd-client" version = "1.0.1" @@ -1569,6 +1580,37 @@ dependencies = [ "tracing", ] +[[package]] +name = "libdd-library-config" +version = "1.1.0" +source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +dependencies = [ + "anyhow", + "libc", + "libdd-trace-protobuf 3.0.1", + "memfd", + "prost 0.14.3", + "rand 0.8.6", + "rmp", + "rmp-serde", + "serde", + "serde_yaml", +] + +[[package]] +name = "libdd-shared-runtime" +version = "0.1.0" +source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +dependencies = [ + "async-trait", + "futures", + "libdd-capabilities", + "libdd-common 3.0.2", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "libdd-telemetry" version = "3.0.0" @@ -1583,7 +1625,7 @@ dependencies = [ "http-body-util", "libc", "libdd-common 2.0.1", - "libdd-ddsketch", + "libdd-ddsketch 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "serde", "serde_json", "sys-info", @@ -1675,11 +1717,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea447dc8a5d84c6b5eb6ea877c4fea4149fd29f6b45fcfc5cfd7edf82a18e056" dependencies = [ "hashbrown 0.15.5", - "libdd-ddsketch", + "libdd-ddsketch 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "libdd-trace-protobuf 2.0.0", "libdd-trace-utils 2.0.2", ] +[[package]] +name = "libdd-trace-stats" +version = "2.0.0" +source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +dependencies = [ + "anyhow", + "async-trait", + "hashbrown 0.15.5", + "http", + "libdd-capabilities", + "libdd-capabilities-impl", + "libdd-common 3.0.2", + "libdd-ddsketch 1.0.1 (git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a)", + "libdd-shared-runtime", + "libdd-trace-protobuf 3.0.1", + "libdd-trace-utils 3.0.1", + "rmp-serde", + "serde", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "libdd-trace-utils" version = "2.0.2" @@ -1819,6 +1884,15 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memfd" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad38eb12aea514a0466ea40a80fd8cc83637065948eb4a426e4aa46261175227" +dependencies = [ + "rustix 1.1.4", +] + [[package]] name = "mime" version = "0.3.17" @@ -1910,7 +1984,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2634,7 +2708,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.12.1", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2971,7 +3045,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -3067,7 +3141,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix 1.1.4", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index c89c5e2f..74fa67bc 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -123,6 +123,7 @@ libdd-common,https://github.com/DataDog/libdatadog/tree/main/datadog-common,Apac libdd-data-pipeline,https://github.com/DataDog/libdatadog/tree/main/libdd-data-pipeline,Apache-2.0,The libdd-data-pipeline Authors libdd-ddsketch,https://github.com/DataDog/libdatadog/tree/main/libdd-ddsketch,Apache-2.0,The libdd-ddsketch Authors libdd-dogstatsd-client,https://github.com/DataDog/libdatadog/tree/main/libdd-dogstatsd-client,Apache-2.0,The libdd-dogstatsd-client Authors +libdd-library-config,https://github.com/DataDog/libdatadog/tree/main/libdd-library-config,Apache-2.0,The libdd-library-config Authors libdd-telemetry,https://github.com/DataDog/libdatadog/tree/main/libdd-telemetry,Apache-2.0,The libdd-telemetry Authors libdd-tinybytes,https://github.com/DataDog/libdatadog/tree/main/libdd-tinybytes,Apache-2.0,The libdd-tinybytes Authors libdd-trace-normalization,https://github.com/DataDog/libdatadog/tree/main/libdd-trace-normalization,Apache-2.0,David Lee @@ -139,6 +140,7 @@ lru,https://github.com/jeromefroe/lru-rs,MIT,Jerome Froelich matchers,https://github.com/hawkw/matchers,MIT,Eliza Weisman memchr,https://github.com/BurntSushi/memchr,Unlicense OR MIT,"Andrew Gallant , bluss" +memfd,https://github.com/lucab/memfd-rs,MIT OR Apache-2.0,"Luca Bruno , Simonas Kazlauskas " mime,https://github.com/hyperium/mime,MIT OR Apache-2.0,Sean McArthur minimal-lexical,https://github.com/Alexhuszagh/minimal-lexical,MIT OR Apache-2.0,Alex Huszagh miniz_oxide,https://github.com/Frommi/miniz_oxide/tree/master/miniz_oxide,MIT OR Zlib OR Apache-2.0,"Frommi , oyvindln , Rich Geldreich richgel99@gmail.com" diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index 8c20c41a..0aae8072 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -18,7 +18,8 @@ use zstd::zstd_safe::CompressionLevel; use datadog_trace_agent::{ aggregator::TraceAggregator, - config, env_verifier, mini_agent, proxy_flusher, stats_flusher, stats_processor, + config, env_verifier, mini_agent, proxy_flusher, stats_concentrator_service, stats_flusher, + stats_generator, stats_processor, trace_flusher::{self, TraceFlusher}, trace_processor, }; @@ -119,6 +120,12 @@ pub async fn main() { .ok() .and_then(|v| v.parse::().ok()) .unwrap_or(DEFAULT_LOG_INTAKE_PORT); + + let dd_serverless_stats_computation_enabled = + env::var("DD_SERVERLESS_STATS_COMPUTATION_ENABLED") + .map(|val| val.to_lowercase() != "false") + .unwrap_or(true); + debug!("Starting serverless trace mini agent"); let env_filter = format!("h2=off,hyper=off,rustls=off,{}", log_level); @@ -144,11 +151,6 @@ pub async fn main() { let env_verifier = Arc::new(env_verifier::ServerlessEnvVerifier::default()); - let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor {}); - - let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher {}); - let stats_processor = Arc::new(stats_processor::ServerlessStatsProcessor {}); - let config = match config::Config::new() { Ok(c) => Arc::new(c), Err(e) => { @@ -157,6 +159,29 @@ pub async fn main() { } }; + let (stats_concentrator_handle, stats_generator) = if dd_serverless_stats_computation_enabled { + info!("serverless stats computation enabled"); + let (service, handle) = + stats_concentrator_service::StatsConcentratorService::new(config.clone()); + tokio::spawn(service.run()); + ( + Some(handle.clone()), + Some(Arc::new(stats_generator::StatsGenerator::new(handle))), + ) + } else { + info!("serverless stats computation disabled"); + (None, None) + }; + + let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor { + stats_generator: stats_generator.clone(), + }); + + let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher { + stats_concentrator: stats_concentrator_handle.clone(), + }); + let stats_processor = Arc::new(stats_processor::ServerlessStatsProcessor {}); + let trace_aggregator = Arc::new(TokioMutex::new(TraceAggregator::default())); let trace_flusher = Arc::new(trace_flusher::ServerlessTraceFlusher::new( trace_aggregator, @@ -175,8 +200,9 @@ pub async fn main() { proxy_flusher, }); + let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); tokio::spawn(async move { - let res = mini_agent.start_mini_agent().await; + let res = mini_agent.start_mini_agent(shutdown_rx).await; if let Err(e) = res { error!("Error when starting serverless trace mini agent: {e:?}"); } diff --git a/crates/datadog-trace-agent/Cargo.toml b/crates/datadog-trace-agent/Cargo.toml index 1c7ad7f3..3c9e42ed 100644 --- a/crates/datadog-trace-agent/Cargo.toml +++ b/crates/datadog-trace-agent/Cargo.toml @@ -24,16 +24,22 @@ async-trait = "0.1.64" tracing = { version = "0.1", default-features = false } serde = { version = "1.0.145", features = ["derive"] } serde_json = "1.0" +thiserror = { version = "1.0.58", default-features = false } libdd-capabilities = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } libdd-capabilities-impl = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } +libdd-library-config = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } libdd-trace-protobuf = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } +libdd-trace-stats = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a", features = [ "mini_agent", ] } libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } datadog-fips = { path = "../datadog-fips" } -reqwest = { version = "0.12.23", features = ["json", "http2"], default-features = false } +reqwest = { version = "0.12.23", features = [ + "json", + "http2", +], default-features = false } bytes = "1.10.1" [dev-dependencies] diff --git a/crates/datadog-trace-agent/src/config.rs b/crates/datadog-trace-agent/src/config.rs index ddbd8207..b1d38893 100644 --- a/crates/datadog-trace-agent/src/config.rs +++ b/crates/datadog-trace-agent/src/config.rs @@ -109,6 +109,8 @@ pub struct Config { /// timeout for environment verification, in milliseconds pub verify_env_timeout_ms: u64, pub proxy_url: Option, + pub service: Option, + pub env: Option, } impl Config { @@ -251,6 +253,8 @@ impl Config { .or_else(|_| env::var("HTTPS_PROXY")) .ok(), tags, + service: env::var("DD_SERVICE").ok(), + env: env::var("DD_ENV").ok(), }) } } @@ -725,6 +729,8 @@ pub mod test_helpers { proxy_request_retry_backoff_base_ms: 100, verify_env_timeout_ms: 1000, proxy_url: None, + service: None, + env: None, } } } diff --git a/crates/datadog-trace-agent/src/lib.rs b/crates/datadog-trace-agent/src/lib.rs index a87bf56b..daeed742 100644 --- a/crates/datadog-trace-agent/src/lib.rs +++ b/crates/datadog-trace-agent/src/lib.rs @@ -13,7 +13,9 @@ pub mod env_verifier; pub mod http_utils; pub mod mini_agent; pub mod proxy_flusher; +pub mod stats_concentrator_service; pub mod stats_flusher; +pub mod stats_generator; pub mod stats_processor; pub mod trace_flusher; pub mod trace_processor; diff --git a/crates/datadog-trace-agent/src/mini_agent.rs b/crates/datadog-trace-agent/src/mini_agent.rs index ae074810..8ed2ce67 100644 --- a/crates/datadog-trace-agent/src/mini_agent.rs +++ b/crates/datadog-trace-agent/src/mini_agent.rs @@ -50,7 +50,10 @@ pub struct MiniAgent { } impl MiniAgent { - pub async fn start_mini_agent(&self) -> Result<(), Box> { + pub async fn start_mini_agent( + &self, + shutdown_rx: tokio::sync::oneshot::Receiver<()>, + ) -> Result<(), Box> { let now = Instant::now(); // verify we are in a serverless function environment. if not, shut down the mini agent. @@ -93,7 +96,7 @@ impl MiniAgent { let stats_config = self.config.clone(); let stats_flusher_handle = tokio::spawn(async move { stats_flusher - .start_stats_flusher(stats_config, stats_rx) + .start_stats_flusher(stats_config, stats_rx, shutdown_rx) .await; }); @@ -521,7 +524,7 @@ impl MiniAgent { INFO_ENDPOINT_PATH, PROFILING_ENDPOINT_PATH ], - "client_drop_p0s": true, + "client_drop_p0s": false, "config": config_json } ); diff --git a/crates/datadog-trace-agent/src/stats_concentrator_service.rs b/crates/datadog-trace-agent/src/stats_concentrator_service.rs new file mode 100644 index 00000000..669e13f7 --- /dev/null +++ b/crates/datadog-trace-agent/src/stats_concentrator_service.rs @@ -0,0 +1,177 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; +use tokio::sync::{mpsc, oneshot}; + +use crate::config::Config; +use libdd_library_config::tracer_metadata::TracerMetadata; +use libdd_trace_protobuf::pb::{ClientStatsPayload, TraceChunk}; +use libdd_trace_stats::span_concentrator::SpanConcentrator; +use std::time::{Duration, SystemTime}; +use tracing::{debug, error}; + +const S_TO_NS: u64 = 1_000_000_000; +const BUCKET_DURATION_NS: u64 = 10 * S_TO_NS; // 10 seconds + +#[derive(Debug, thiserror::Error)] +pub enum StatsError { + #[error("Failed to send command to concentrator: {0}")] + SendError(Box>), + #[error("Failed to receive response from concentrator: {0}")] + RecvError(oneshot::error::RecvError), +} + +pub enum ConcentratorCommand { + AddChunk(Box, Arc), + Flush(bool, oneshot::Sender>), +} + +/// A cloneable handle to the stats concentrator service, safe to share across async tasks. +#[derive(Clone)] +pub struct StatsConcentratorHandle { + tx: mpsc::UnboundedSender, + channel_depth: Arc, +} + +impl StatsConcentratorHandle { + #[must_use] + pub fn new( + tx: mpsc::UnboundedSender, + channel_depth: Arc, + ) -> Self { + Self { tx, channel_depth } + } + + /// Adds a trace chunk for stats computation. + pub fn add_chunk( + &self, + chunk: TraceChunk, + metadata: Arc, + ) -> Result<(), StatsError> { + self.channel_depth.fetch_add(1, Ordering::Relaxed); + self.tx + .send(ConcentratorCommand::AddChunk(Box::new(chunk), metadata)) + .map_err(|e| { + self.channel_depth.fetch_sub(1, Ordering::Relaxed); + StatsError::SendError(Box::new(e)) + }) + } + + pub async fn flush(&self, force_flush: bool) -> Result, StatsError> { + let (response_tx, response_rx) = oneshot::channel(); + self.channel_depth.fetch_add(1, Ordering::Relaxed); + self.tx + .send(ConcentratorCommand::Flush(force_flush, response_tx)) + .map_err(|e| { + self.channel_depth.fetch_sub(1, Ordering::Relaxed); + StatsError::SendError(Box::new(e)) + })?; + response_rx.await.map_err(StatsError::RecvError) + } +} + +pub struct StatsConcentratorService { + concentrator: SpanConcentrator, + rx: mpsc::UnboundedReceiver, + tracer_metadata: Option>, + config: Arc, + channel_depth: Arc, +} + +impl StatsConcentratorService { + #[must_use] + pub fn new(config: Arc) -> (Self, StatsConcentratorHandle) { + let (tx, rx) = mpsc::unbounded_channel(); + let channel_depth = Arc::new(AtomicUsize::new(0)); + let handle = StatsConcentratorHandle::new(tx, Arc::clone(&channel_depth)); + // TODO: set span_kinds_stats_computed and peer_tag_keys + let concentrator = SpanConcentrator::new( + Duration::from_nanos(BUCKET_DURATION_NS), + SystemTime::now(), + vec![], + vec![], + ); + let service = Self { + concentrator, + rx, + tracer_metadata: None, + config, + channel_depth, + }; + (service, handle) + } + + pub async fn run(mut self) { + while let Some(command) = self.rx.recv().await { + match command { + ConcentratorCommand::AddChunk(chunk, metadata) => { + self.channel_depth.fetch_sub(1, Ordering::Relaxed); + if self.tracer_metadata.is_none() { + self.tracer_metadata = Some(metadata); + } + for span in &chunk.spans { + self.concentrator.add_span(span); + } + } + ConcentratorCommand::Flush(force_flush, response_tx) => { + let depth = self.channel_depth.fetch_sub(1, Ordering::Relaxed) - 1; + debug!(channel_depth = depth, "Stats concentrator channel depth"); + self.handle_flush(force_flush, response_tx); + } + } + } + } + + fn handle_flush( + &mut self, + force_flush: bool, + response_tx: oneshot::Sender>, + ) { + let stats_buckets = self.concentrator.flush(SystemTime::now(), force_flush); + let stats = if stats_buckets.is_empty() { + None + } else { + let default_metadata = TracerMetadata::default(); + let metadata = self.tracer_metadata.as_deref().unwrap_or(&default_metadata); + Some(ClientStatsPayload { + // Do not set hostname so the trace stats backend can aggregate stats properly + hostname: String::new(), + // Prefer env from the tracer payload, fall back to agent config + env: metadata + .service_env + .clone() + .filter(|s| !s.is_empty()) + .or_else(|| self.config.env.clone()) + .unwrap_or_default(), + version: metadata.service_version.clone().unwrap_or_default(), + lang: metadata.tracer_language.clone(), + tracer_version: metadata.tracer_version.clone(), + // Not set for agent-computed stats; runtime_id identifies tracer-computed payloads + runtime_id: String::new(), + // Not supported yet + sequence: 0, + // Not supported yet + agent_aggregation: String::new(), + // One service per app for serverless + service: self.config.service.clone().unwrap_or_default(), + container_id: metadata.container_id.clone().unwrap_or_default(), + // Not supported yet + tags: vec![], + // Not supported yet + git_commit_sha: String::new(), + // Not supported yet + image_tag: String::new(), + stats: stats_buckets, + // Not supported yet + process_tags: String::new(), + // Not supported yet + process_tags_hash: 0, + }) + }; + if let Err(e) = response_tx.send(stats) { + error!("Failed to return trace stats: {e:?}"); + } + } +} diff --git a/crates/datadog-trace-agent/src/stats_flusher.rs b/crates/datadog-trace-agent/src/stats_flusher.rs index 198593cb..2fb0597a 100644 --- a/crates/datadog-trace-agent/src/stats_flusher.rs +++ b/crates/datadog-trace-agent/src/stats_flusher.rs @@ -4,29 +4,73 @@ use async_trait::async_trait; use libdd_capabilities_impl::DefaultHttpClient; use std::{sync::Arc, time}; -use tokio::sync::{Mutex, mpsc::Receiver}; +use tokio::sync::mpsc::Receiver; +use tokio::sync::oneshot; use tracing::{debug, error}; use libdd_trace_protobuf::pb; use libdd_trace_utils::stats_utils; use crate::config::Config; +use crate::stats_concentrator_service::StatsConcentratorHandle; + +/// Whether the stats flusher should run `flush_stats` +fn should_flush_stats_buffer( + channel_has_tracer_stats: bool, + serverless_stats_enabled: bool, +) -> bool { + channel_has_tracer_stats || serverless_stats_enabled +} + +/// Serializes and sends a single `StatsPayload` to the intake. +async fn send_stats_payload(config: &Arc, payload: pb::StatsPayload) { + debug!("Stats payload to be sent: {payload:?}"); + let serialized = match stats_utils::serialize_stats_payload(payload) { + Ok(res) => res, + Err(err) => { + error!("Failed to serialize stats payload, dropping stats: {err}"); + return; + } + }; + #[allow(clippy::unwrap_used)] + match stats_utils::send_stats_payload::( + serialized, + &config.trace_stats_intake, + config.trace_stats_intake.api_key.as_ref().unwrap(), + ) + .await + { + Ok(_) => debug!("Successfully flushed stats"), + Err(e) => error!("Error sending stats: {e:?}"), + } +} #[async_trait] pub trait StatsFlusher { /// Starts a stats flusher that listens for stats payloads sent to the tokio mpsc Receiver, - /// implementing flushing logic that calls flush_stats. + /// implementing flushing logic that calls flush_stats. Runs until the shutdown signal fires, + /// at which point it performs a final force flush and returns. async fn start_stats_flusher( &self, config: Arc, - mut rx: Receiver, + rx: Receiver, + shutdown_rx: oneshot::Receiver<()>, ); /// Flushes stats to the Datadog trace stats intake. - async fn flush_stats(&self, config: Arc, traces: Vec); + /// `force_flush` controls whether in-progress concentrator buckets are flushed (true on + /// shutdown, false on normal interval flushes). + async fn flush_stats( + &self, + config: Arc, + client_stats: Vec, + force_flush: bool, + ); } #[derive(Clone)] -pub struct ServerlessStatsFlusher {} +pub struct ServerlessStatsFlusher { + pub stats_concentrator: Option, +} #[async_trait] impl StatsFlusher for ServerlessStatsFlusher { @@ -34,60 +78,82 @@ impl StatsFlusher for ServerlessStatsFlusher { &self, config: Arc, mut rx: Receiver, + mut shutdown_rx: oneshot::Receiver<()>, ) { - let buffer: Arc>> = Arc::new(Mutex::new(Vec::new())); - - let buffer_producer = buffer.clone(); - let buffer_consumer = buffer.clone(); - - tokio::spawn(async move { - while let Some(stats_payload) = rx.recv().await { - let mut buffer = buffer_producer.lock().await; - buffer.push(stats_payload); - } - }); + let mut interval = + tokio::time::interval(time::Duration::from_secs(config.stats_flush_interval_secs)); + let mut buffer: Vec = Vec::new(); loop { - tokio::time::sleep(time::Duration::from_secs(config.stats_flush_interval_secs)).await; + tokio::select! { + // Receive client stats and add them to the buffer + Some(stats) = rx.recv() => { + buffer.push(stats); + } + + // Drain client stats in buffer and stats from concentrator on interval + _ = interval.tick() => { + let client_stats = std::mem::take(&mut buffer); + let should_flush = should_flush_stats_buffer( + !client_stats.is_empty(), + self.stats_concentrator.is_some(), + ); + if should_flush { + self.flush_stats(config.clone(), client_stats, false).await; + } + } - let mut buffer = buffer_consumer.lock().await; - if !buffer.is_empty() { - self.flush_stats(config.clone(), buffer.to_vec()).await; - buffer.clear(); + _ = &mut shutdown_rx => { + // Drain any client stats that arrived before the shutdown signal + while let Ok(stats) = rx.try_recv() { + buffer.push(stats); + } + // Force flush all in progress concentrator stats buckets on shutdown signal + self.flush_stats(config.clone(), std::mem::take(&mut buffer), true).await; + return; + } } } } - async fn flush_stats(&self, config: Arc, stats: Vec) { - if stats.is_empty() { - return; + /// Flushes client computed stats from the tracer and serverless computed stats as separate payloads + async fn flush_stats( + &self, + config: Arc, + client_stats: Vec, + force_flush: bool, + ) { + // Flush client computed stats from the tracer + if !client_stats.is_empty() { + let payload = stats_utils::construct_stats_payload(client_stats); + send_stats_payload(&config, payload).await; } - debug!("Flushing {} stats", stats.len()); - - let stats_payload = stats_utils::construct_stats_payload(stats); - debug!("Stats payload to be sent: {stats_payload:?}"); - - let serialized_stats_payload = match stats_utils::serialize_stats_payload(stats_payload) { - Ok(res) => res, - Err(err) => { - error!("Failed to serialize stats payload, dropping stats: {err}"); - return; - } - }; - - #[allow(clippy::unwrap_used)] - match stats_utils::send_stats_payload::( - serialized_stats_payload, - &config.trace_stats_intake, - config.trace_stats_intake.api_key.as_ref().unwrap(), - ) - .await - { - Ok(_) => debug!("Successfully flushed stats"), - Err(e) => { - error!("Error sending stats: {e:?}") + // Flush concentrator stats + if let Some(ref concentrator) = self.stats_concentrator { + match concentrator.flush(force_flush).await { + Ok(Some(agent_stats)) => { + let mut payload = stats_utils::construct_stats_payload(vec![agent_stats]); + payload.client_computed = false; + send_stats_payload(&config, payload).await; + } + Ok(None) => {} + Err(e) => error!("Failed to flush concentrator stats: {e}"), } } } } + +#[cfg(test)] +mod tests { + use super::should_flush_stats_buffer; + + #[test] + fn should_flush_stats_buffer_all_cases() { + // (stats channel empty, serverless computed stats enabled with concentrator) + assert!(!should_flush_stats_buffer(false, false)); + assert!(should_flush_stats_buffer(true, false)); + assert!(should_flush_stats_buffer(false, true)); + assert!(should_flush_stats_buffer(true, true)); + } +} diff --git a/crates/datadog-trace-agent/src/stats_generator.rs b/crates/datadog-trace-agent/src/stats_generator.rs new file mode 100644 index 00000000..7e06a387 --- /dev/null +++ b/crates/datadog-trace-agent/src/stats_generator.rs @@ -0,0 +1,64 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; + +use crate::stats_concentrator_service::{StatsConcentratorHandle, StatsError}; +use libdd_library_config::tracer_metadata::TracerMetadata; +use libdd_trace_utils::tracer_payload::TracerPayloadCollection; +use tracing::error; + +pub struct StatsGenerator { + stats_concentrator: StatsConcentratorHandle, +} + +#[derive(Debug, thiserror::Error)] +pub enum StatsGeneratorError { + #[error("Failed to send command to stats concentrator: {0}")] + ConcentratorCommandError(StatsError), + #[error("Unsupported tracer payload version. Failed to send trace stats.")] + TracerPayloadVersionError, +} + +// Sends tracer payloads to the stats concentrator +impl StatsGenerator { + #[must_use] + pub fn new(stats_concentrator: StatsConcentratorHandle) -> Self { + Self { stats_concentrator } + } + + pub fn send( + &self, + tracer_payload_collection: &TracerPayloadCollection, + ) -> Result<(), StatsGeneratorError> { + if let TracerPayloadCollection::V07(tracer_payloads) = tracer_payload_collection { + for tracer_payload in tracer_payloads { + let metadata = Arc::new(TracerMetadata { + schema_version: 2, + runtime_id: None, + tracer_language: tracer_payload.language_name.clone(), + tracer_version: tracer_payload.tracer_version.clone(), + hostname: String::new(), + service_name: None, + service_env: Some(tracer_payload.env.clone()), + service_version: Some(tracer_payload.app_version.clone()), + process_tags: None, + container_id: Some(tracer_payload.container_id.clone()), + }); + for chunk in &tracer_payload.chunks { + if let Err(err) = self + .stats_concentrator + .add_chunk(chunk.clone(), Arc::clone(&metadata)) + { + error!("Failed to send trace chunk to concentrator: {err}"); + return Err(StatsGeneratorError::ConcentratorCommandError(err)); + } + } + } + Ok(()) + } else { + error!("Unsupported tracer payload version. Failed to send trace stats."); + Err(StatsGeneratorError::TracerPayloadVersionError) + } + } +} diff --git a/crates/datadog-trace-agent/src/trace_processor.rs b/crates/datadog-trace-agent/src/trace_processor.rs index 96f82098..bd385e01 100644 --- a/crates/datadog-trace-agent/src/trace_processor.rs +++ b/crates/datadog-trace-agent/src/trace_processor.rs @@ -7,7 +7,7 @@ use async_trait::async_trait; use hyper::{StatusCode, http}; use libdd_common::http_common; use tokio::sync::mpsc::Sender; -use tracing::debug; +use tracing::{debug, error}; use libdd_trace_obfuscation::obfuscate::obfuscate_span; use libdd_trace_protobuf::pb; @@ -18,6 +18,7 @@ use libdd_trace_utils::tracer_payload::{TraceChunkProcessor, TracerPayloadCollec use crate::{ config::Config, http_utils::{self, log_and_create_http_response, log_and_create_traces_success_http_response}, + stats_generator::StatsGenerator, }; const TRACER_PAYLOAD_FUNCTION_TAGS_TAG_KEY: &str = "_dd.tags.function"; @@ -65,7 +66,10 @@ impl TraceChunkProcessor for ChunkProcessor { } } #[derive(Clone)] -pub struct ServerlessTraceProcessor {} +pub struct ServerlessTraceProcessor { + /// The stats generator to use for generating stats and sending them to the stats concentrator. + pub stats_generator: Option>, +} #[async_trait] impl TraceProcessor for ServerlessTraceProcessor { @@ -139,6 +143,13 @@ impl TraceProcessor for ServerlessTraceProcessor { } } + if let Some(stats_generator) = self.stats_generator.as_ref() + && !tracer_header_tags.client_computed_stats + && let Err(e) = stats_generator.send(&payload) + { + error!("Stats generator error: {e}"); + } + let send_data = SendData::new(body_size, payload, tracer_header_tags, &config.trace_intake); // send trace payload to our trace flusher @@ -219,6 +230,8 @@ mod tests { ..Default::default() }, tags: Tags::from_env_string("env:test,service:my-service"), + service: Some("test-service".to_string()), + env: Some("test-env".to_string()), } } @@ -254,7 +267,9 @@ mod tests { .body(http_common::Body::from(bytes)) .unwrap(); - let trace_processor = trace_processor::ServerlessTraceProcessor {}; + let trace_processor = trace_processor::ServerlessTraceProcessor { + stats_generator: None, + }; let res = trace_processor .process_traces( Arc::new(create_test_config()), @@ -326,7 +341,9 @@ mod tests { .body(http_common::Body::from(bytes)) .unwrap(); - let trace_processor = trace_processor::ServerlessTraceProcessor {}; + let trace_processor = trace_processor::ServerlessTraceProcessor { + stats_generator: None, + }; let res = trace_processor .process_traces( Arc::new(create_test_config()), diff --git a/crates/datadog-trace-agent/tests/common/helpers.rs b/crates/datadog-trace-agent/tests/common/helpers.rs index 6dd8d825..9808f474 100644 --- a/crates/datadog-trace-agent/tests/common/helpers.rs +++ b/crates/datadog-trace-agent/tests/common/helpers.rs @@ -23,6 +23,7 @@ pub async fn send_tcp_request( uri: &str, method: &str, body: Option>, + additional_headers: &[(&str, &str)], ) -> Result, Box> { let stream = timeout( Duration::from_secs(2), @@ -42,6 +43,10 @@ pub async fn send_tcp_request( .method(method) .header("Content-Type", "application/msgpack"); + for (name, value) in additional_headers { + request_builder = request_builder.header(*name, *value); + } + let response = if let Some(body_data) = body { let body_len = body_data.len(); request_builder = request_builder.header("Content-Length", body_len.to_string()); diff --git a/crates/datadog-trace-agent/tests/common/mock_server.rs b/crates/datadog-trace-agent/tests/common/mock_server.rs index f1beb1ac..cd0cd6b9 100644 --- a/crates/datadog-trace-agent/tests/common/mock_server.rs +++ b/crates/datadog-trace-agent/tests/common/mock_server.rs @@ -4,7 +4,7 @@ //! Simple mock HTTP server for testing flushers use http_body_util::BodyExt; -use hyper::{Request, Response, body::Incoming}; +use hyper::{Request, Response, StatusCode, body::Incoming}; use hyper_util::rt::TokioIo; use libdd_common::http_common; use std::net::SocketAddr; @@ -60,6 +60,7 @@ impl MockServer { // Capture the request let method = req.method().to_string(); let path = req.uri().path().to_string(); + let is_stats_intake = path.ends_with("/stats"); let headers: Vec<(String, String)> = req .headers() .iter() @@ -82,11 +83,18 @@ impl MockServer { body: body_bytes, }); - // Return 200 OK + // Trace intake accepts 2xx + // Stats intake accepts 202 + // see `libdd_trace_utils::stats_utils::send_stats_payload_with_client` + let (status, body) = if is_stats_intake { + (StatusCode::ACCEPTED, http_common::Body::empty()) + } else { + (StatusCode::OK, http_common::Body::from(r#"{"ok":true}"#)) + }; Ok::<_, hyper::http::Error>( Response::builder() - .status(200) - .body(http_common::Body::from(r#"{"ok":true}"#)) + .status(status) + .body(body) .unwrap(), ) } diff --git a/crates/datadog-trace-agent/tests/common/mocks.rs b/crates/datadog-trace-agent/tests/common/mocks.rs index 842c45f0..dfd98ccd 100644 --- a/crates/datadog-trace-agent/tests/common/mocks.rs +++ b/crates/datadog-trace-agent/tests/common/mocks.rs @@ -12,6 +12,7 @@ use libdd_trace_protobuf::pb; use libdd_trace_utils::trace_utils::{self, MiniAgentMetadata, SendData}; use std::sync::Arc; use tokio::sync::mpsc::{Receiver, Sender}; +use tokio::sync::oneshot; /// Mock trace processor that returns 200 OK for all requests #[allow(dead_code)] @@ -86,6 +87,7 @@ impl StatsFlusher for MockStatsFlusher { &self, _config: Arc, mut stats_rx: Receiver, + _shutdown_rx: oneshot::Receiver<()>, ) { // Consume messages from the channel without processing them while let Some(_stats) = stats_rx.recv().await { @@ -93,7 +95,12 @@ impl StatsFlusher for MockStatsFlusher { } } - async fn flush_stats(&self, _config: Arc, _traces: Vec) { + async fn flush_stats( + &self, + _config: Arc, + _traces: Vec, + _force_flush: bool, + ) { // Do nothing } } diff --git a/crates/datadog-trace-agent/tests/integration_test.rs b/crates/datadog-trace-agent/tests/integration_test.rs index 1491954f..358f98cf 100644 --- a/crates/datadog-trace-agent/tests/integration_test.rs +++ b/crates/datadog-trace-agent/tests/integration_test.rs @@ -28,7 +28,7 @@ const FLUSH_WAIT_DURATION: Duration = Duration::from_millis(1500); /// Helper to configure a config with mock server endpoints pub fn configure_mock_endpoints(config: &mut Config, mock_server_url: &str) { let trace_url = format!("{}/api/v0.2/traces", mock_server_url); - let stats_url = format!("{}/api/v0.6/stats", mock_server_url); + let stats_url = format!("{}/api/v0.2/stats", mock_server_url); config.trace_intake = libdd_common::Endpoint { url: trace_url.parse().unwrap(), @@ -47,20 +47,30 @@ pub fn configure_mock_endpoints(config: &mut Config, mock_server_url: &str) { /// Helper to create a mini agent with real flushers pub fn create_mini_agent_with_real_flushers(config: Arc) -> MiniAgent { use datadog_trace_agent::{ - aggregator::TraceAggregator, stats_flusher::ServerlessStatsFlusher, + aggregator::TraceAggregator, stats_concentrator_service::StatsConcentratorService, + stats_flusher::ServerlessStatsFlusher, stats_generator::StatsGenerator, stats_processor::ServerlessStatsProcessor, trace_flusher::ServerlessTraceFlusher, }; + let (service, stats_concentrator_handle) = StatsConcentratorService::new(config.clone()); + tokio::spawn(service.run()); + + let stats_generator = Some(Arc::new(StatsGenerator::new( + stats_concentrator_handle.clone(), + ))); + let aggregator = Arc::new(tokio::sync::Mutex::new(TraceAggregator::default())); MiniAgent { config: config.clone(), - trace_processor: Arc::new(ServerlessTraceProcessor {}), + trace_processor: Arc::new(ServerlessTraceProcessor { stats_generator }), trace_flusher: Arc::new(ServerlessTraceFlusher::new( aggregator.clone(), config.clone(), )), stats_processor: Arc::new(ServerlessStatsProcessor {}), - stats_flusher: Arc::new(ServerlessStatsFlusher {}), + stats_flusher: Arc::new(ServerlessStatsFlusher { + stats_concentrator: Some(stats_concentrator_handle), + }), env_verifier: Arc::new(MockEnvVerifier), proxy_flusher: Arc::new(ProxyFlusher::new(config.clone())), } @@ -102,6 +112,52 @@ pub fn verify_trace_request(mock_server: &common::mock_server::MockServer) { ); } +/// Helper to verify stats request sent to mock server +pub fn verify_stats_request(mock_server: &common::mock_server::MockServer) { + let stats_reqs = mock_server.get_requests_for_path("/api/v0.2/stats"); + + assert!( + !stats_reqs.is_empty(), + "Expected at least one stats request to mock server" + ); + + let stats_req = &stats_reqs[0]; + assert_eq!(stats_req.method, "POST", "Expected POST method"); + + let content_type = stats_req + .headers + .iter() + .find(|(k, _)| k.to_lowercase() == "content-type") + .map(|(_, v)| v.as_str()); + assert_eq!( + content_type, + Some("application/msgpack"), + "Expected msgpack content-type" + ); + + let api_key = stats_req + .headers + .iter() + .find(|(k, _)| k.to_lowercase() == "dd-api-key") + .map(|(_, v)| v.as_str()); + assert_eq!(api_key, Some("test-api-key"), "Expected API key header"); + + assert!( + !stats_req.body.is_empty(), + "Expected non-empty stats payload" + ); +} + +/// Helper to verify stats request was not sent to mock server +pub fn verify_no_stats_request(mock_server: &common::mock_server::MockServer) { + let stats_reqs = mock_server.get_requests_for_path("/api/v0.2/stats"); + assert!( + stats_reqs.is_empty(), + "Expected no stats request to mock server, received {} request(s)", + stats_reqs.len() + ); +} + #[cfg(test)] #[tokio::test] #[serial] @@ -110,7 +166,9 @@ async fn test_mini_agent_tcp_handles_requests() { let test_port = config.dd_apm_receiver_port; let mini_agent = MiniAgent { config: config.clone(), - trace_processor: Arc::new(ServerlessTraceProcessor {}), + trace_processor: Arc::new(ServerlessTraceProcessor { + stats_generator: None, + }), trace_flusher: Arc::new(MockTraceFlusher), stats_processor: Arc::new(MockStatsProcessor), stats_flusher: Arc::new(MockStatsFlusher), @@ -120,14 +178,15 @@ async fn test_mini_agent_tcp_handles_requests() { // Start the mini agent let agent_handle = tokio::spawn(async move { - let _ = mini_agent.start_mini_agent().await; + let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + let _ = mini_agent.start_mini_agent(shutdown_rx).await; }); // Give server time to start tokio::time::sleep(Duration::from_millis(100)).await; // Test /info endpoint - let info_response = send_tcp_request(test_port, "/info", "GET", None) + let info_response = send_tcp_request(test_port, "/info", "GET", None, &[]) .await .expect("Failed to send /info request"); assert_eq!( @@ -160,8 +219,8 @@ async fn test_mini_agent_tcp_handles_requests() { // Check client_drop_p0s flag assert_eq!( - json["client_drop_p0s"], true, - "Expected client_drop_p0s to be true" + json["client_drop_p0s"], false, + "Expected client_drop_p0s to be false" ); // Check config object @@ -181,9 +240,10 @@ async fn test_mini_agent_tcp_handles_requests() { // Test /v0.4/traces endpoint with real trace data let trace_payload = create_test_trace_payload(); - let trace_response = send_tcp_request(test_port, "/v0.4/traces", "POST", Some(trace_payload)) - .await - .expect("Failed to send /v0.4/traces request"); + let trace_response = + send_tcp_request(test_port, "/v0.4/traces", "POST", Some(trace_payload), &[]) + .await + .expect("Failed to send /v0.4/traces request"); assert_eq!( trace_response.status(), StatusCode::OK, @@ -206,7 +266,9 @@ async fn test_mini_agent_named_pipe_handles_requests() { let mini_agent = MiniAgent { config: config.clone(), - trace_processor: Arc::new(ServerlessTraceProcessor {}), + trace_processor: Arc::new(ServerlessTraceProcessor { + stats_generator: None, + }), trace_flusher: Arc::new(MockTraceFlusher), stats_processor: Arc::new(MockStatsProcessor), stats_flusher: Arc::new(MockStatsFlusher), @@ -216,7 +278,8 @@ async fn test_mini_agent_named_pipe_handles_requests() { // Start the mini agent let agent_handle = tokio::spawn(async move { - let _ = mini_agent.start_mini_agent().await; + let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + let _ = mini_agent.start_mini_agent(shutdown_rx).await; }); // Give server time to create pipe @@ -287,15 +350,72 @@ async fn test_mini_agent_tcp_with_real_flushers() { let mini_agent = create_mini_agent_with_real_flushers(config); + let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + let agent_handle = tokio::spawn(async move { + let _ = mini_agent.start_mini_agent(shutdown_rx).await; + }); + + // Wait for server to be ready + let mut server_ready = false; + for _ in 0..20 { + tokio::time::sleep(Duration::from_millis(50)).await; + if let Ok(response) = send_tcp_request(test_port, "/info", "GET", None, &[]).await { + if response.status().is_success() { + server_ready = true; + break; + } + } + } + assert!( + server_ready, + "Mini agent server failed to start within timeout" + ); + + // Send trace data + let trace_payload = create_test_trace_payload(); + let trace_response = + send_tcp_request(test_port, "/v0.4/traces", "POST", Some(trace_payload), &[]) + .await + .expect("Failed to send /v0.4/traces request"); + assert_eq!(trace_response.status(), StatusCode::OK); + + // Wait for trace flush + tokio::time::sleep(FLUSH_WAIT_DURATION).await; + verify_trace_request(&mock_server); + + // Trigger shutdown to force flush in progress concentrator buckets + let _ = shutdown_tx.send(()); + tokio::time::sleep(FLUSH_WAIT_DURATION).await; + verify_stats_request(&mock_server); // Stats generator should generate stats from trace payload + + // Clean up + agent_handle.abort(); +} + +#[cfg(test)] +#[tokio::test] +#[serial] +async fn test_mini_agent_tcp_with_real_flushers_and_tracer_computed_stats() { + let mock_server: MockServer = MockServer::start().await; + tokio::time::sleep(Duration::from_millis(50)).await; + + let mut config = create_tcp_test_config(8128); // use different port to avoid race condition with other tests + configure_mock_endpoints(&mut config, &mock_server.url()); + let config = Arc::new(config); + let test_port = config.dd_apm_receiver_port; + + let mini_agent = create_mini_agent_with_real_flushers(config); + let agent_handle = tokio::spawn(async move { - let _ = mini_agent.start_mini_agent().await; + let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + let _ = mini_agent.start_mini_agent(shutdown_rx).await; }); // Wait for server to be ready let mut server_ready = false; for _ in 0..20 { tokio::time::sleep(Duration::from_millis(50)).await; - if let Ok(response) = send_tcp_request(test_port, "/info", "GET", None).await + if let Ok(response) = send_tcp_request(test_port, "/info", "GET", None, &[]).await && response.status().is_success() { server_ready = true; @@ -309,16 +429,24 @@ async fn test_mini_agent_tcp_with_real_flushers() { // Send trace data let trace_payload = create_test_trace_payload(); - let trace_response = send_tcp_request(test_port, "/v0.4/traces", "POST", Some(trace_payload)) - .await - .expect("Failed to send /v0.4/traces request"); + let trace_response = send_tcp_request( + test_port, + "/v0.4/traces", + "POST", + Some(trace_payload), + &[("Datadog-Client-Computed-Stats", "true")], + ) + .await + .expect("Failed to send /v0.4/traces request"); assert_eq!(trace_response.status(), StatusCode::OK); // Wait for flush tokio::time::sleep(FLUSH_WAIT_DURATION).await; verify_trace_request(&mock_server); + verify_no_stats_request(&mock_server); // Stats generator should not generate stats from trace payload when Datadog-Client-Computed-Stats header is present in trace payload + // Clean up agent_handle.abort(); } @@ -338,8 +466,9 @@ async fn test_mini_agent_named_pipe_with_real_flushers() { let mini_agent = create_mini_agent_with_real_flushers(config); + let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); let agent_handle = tokio::spawn(async move { - let _ = mini_agent.start_mini_agent().await; + let _ = mini_agent.start_mini_agent(shutdown_rx).await; }); // Wait for server to be ready @@ -366,10 +495,15 @@ async fn test_mini_agent_named_pipe_with_real_flushers() { .expect("Failed to send /v0.4/traces request over named pipe"); assert_eq!(trace_response.status(), StatusCode::OK); - // Wait for flush + // Wait for trace flush tokio::time::sleep(FLUSH_WAIT_DURATION).await; - verify_trace_request(&mock_server); + // Trigger shutdown to force flush in progress concentrator buckets + let _ = shutdown_tx.send(()); + tokio::time::sleep(FLUSH_WAIT_DURATION).await; + verify_stats_request(&mock_server); + + // Clean up agent_handle.abort(); } From 005ef6b539c58f0fe2babe52d6a588d746124614 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Mon, 20 Apr 2026 10:42:28 -0400 Subject: [PATCH 02/21] remove channel depth debug log --- .../src/stats_concentrator_service.rs | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/crates/datadog-trace-agent/src/stats_concentrator_service.rs b/crates/datadog-trace-agent/src/stats_concentrator_service.rs index 669e13f7..8ddac1e8 100644 --- a/crates/datadog-trace-agent/src/stats_concentrator_service.rs +++ b/crates/datadog-trace-agent/src/stats_concentrator_service.rs @@ -32,16 +32,14 @@ pub enum ConcentratorCommand { #[derive(Clone)] pub struct StatsConcentratorHandle { tx: mpsc::UnboundedSender, - channel_depth: Arc, } impl StatsConcentratorHandle { #[must_use] pub fn new( tx: mpsc::UnboundedSender, - channel_depth: Arc, ) -> Self { - Self { tx, channel_depth } + Self { tx } } /// Adds a trace chunk for stats computation. @@ -50,22 +48,18 @@ impl StatsConcentratorHandle { chunk: TraceChunk, metadata: Arc, ) -> Result<(), StatsError> { - self.channel_depth.fetch_add(1, Ordering::Relaxed); self.tx .send(ConcentratorCommand::AddChunk(Box::new(chunk), metadata)) .map_err(|e| { - self.channel_depth.fetch_sub(1, Ordering::Relaxed); StatsError::SendError(Box::new(e)) }) } pub async fn flush(&self, force_flush: bool) -> Result, StatsError> { let (response_tx, response_rx) = oneshot::channel(); - self.channel_depth.fetch_add(1, Ordering::Relaxed); self.tx .send(ConcentratorCommand::Flush(force_flush, response_tx)) .map_err(|e| { - self.channel_depth.fetch_sub(1, Ordering::Relaxed); StatsError::SendError(Box::new(e)) })?; response_rx.await.map_err(StatsError::RecvError) @@ -77,15 +71,13 @@ pub struct StatsConcentratorService { rx: mpsc::UnboundedReceiver, tracer_metadata: Option>, config: Arc, - channel_depth: Arc, } impl StatsConcentratorService { #[must_use] pub fn new(config: Arc) -> (Self, StatsConcentratorHandle) { let (tx, rx) = mpsc::unbounded_channel(); - let channel_depth = Arc::new(AtomicUsize::new(0)); - let handle = StatsConcentratorHandle::new(tx, Arc::clone(&channel_depth)); + let handle = StatsConcentratorHandle::new(tx); // TODO: set span_kinds_stats_computed and peer_tag_keys let concentrator = SpanConcentrator::new( Duration::from_nanos(BUCKET_DURATION_NS), @@ -98,7 +90,6 @@ impl StatsConcentratorService { rx, tracer_metadata: None, config, - channel_depth, }; (service, handle) } @@ -107,7 +98,6 @@ impl StatsConcentratorService { while let Some(command) = self.rx.recv().await { match command { ConcentratorCommand::AddChunk(chunk, metadata) => { - self.channel_depth.fetch_sub(1, Ordering::Relaxed); if self.tracer_metadata.is_none() { self.tracer_metadata = Some(metadata); } @@ -116,8 +106,6 @@ impl StatsConcentratorService { } } ConcentratorCommand::Flush(force_flush, response_tx) => { - let depth = self.channel_depth.fetch_sub(1, Ordering::Relaxed) - 1; - debug!(channel_depth = depth, "Stats concentrator channel depth"); self.handle_flush(force_flush, response_tx); } } From 10f92e662e947c727a121c3c2f790148ad1e805e Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Mon, 20 Apr 2026 10:43:49 -0400 Subject: [PATCH 03/21] fmt and clippy --- .../src/stats_concentrator_service.rs | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/crates/datadog-trace-agent/src/stats_concentrator_service.rs b/crates/datadog-trace-agent/src/stats_concentrator_service.rs index 8ddac1e8..af052c87 100644 --- a/crates/datadog-trace-agent/src/stats_concentrator_service.rs +++ b/crates/datadog-trace-agent/src/stats_concentrator_service.rs @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 use std::sync::Arc; -use std::sync::atomic::{AtomicUsize, Ordering}; use tokio::sync::{mpsc, oneshot}; use crate::config::Config; @@ -10,7 +9,7 @@ use libdd_library_config::tracer_metadata::TracerMetadata; use libdd_trace_protobuf::pb::{ClientStatsPayload, TraceChunk}; use libdd_trace_stats::span_concentrator::SpanConcentrator; use std::time::{Duration, SystemTime}; -use tracing::{debug, error}; +use tracing::error; const S_TO_NS: u64 = 1_000_000_000; const BUCKET_DURATION_NS: u64 = 10 * S_TO_NS; // 10 seconds @@ -36,9 +35,7 @@ pub struct StatsConcentratorHandle { impl StatsConcentratorHandle { #[must_use] - pub fn new( - tx: mpsc::UnboundedSender, - ) -> Self { + pub fn new(tx: mpsc::UnboundedSender) -> Self { Self { tx } } @@ -50,18 +47,14 @@ impl StatsConcentratorHandle { ) -> Result<(), StatsError> { self.tx .send(ConcentratorCommand::AddChunk(Box::new(chunk), metadata)) - .map_err(|e| { - StatsError::SendError(Box::new(e)) - }) + .map_err(|e| StatsError::SendError(Box::new(e))) } pub async fn flush(&self, force_flush: bool) -> Result, StatsError> { let (response_tx, response_rx) = oneshot::channel(); self.tx .send(ConcentratorCommand::Flush(force_flush, response_tx)) - .map_err(|e| { - StatsError::SendError(Box::new(e)) - })?; + .map_err(|e| StatsError::SendError(Box::new(e)))?; response_rx.await.map_err(StatsError::RecvError) } } From 2f15d510a18f199d5b4b9f302ec3a85b5677a7a9 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Mon, 20 Apr 2026 12:32:30 -0400 Subject: [PATCH 04/21] add stats concentrator service handle for unexpected background service terminations --- crates/datadog-serverless-compat/src/main.rs | 32 ++++--- crates/datadog-trace-agent/src/mini_agent.rs | 23 +++++ .../tests/integration_test.rs | 85 ++++++++++++++++--- 3 files changed, 114 insertions(+), 26 deletions(-) diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index 0aae8072..97648d47 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -159,19 +159,21 @@ pub async fn main() { } }; - let (stats_concentrator_handle, stats_generator) = if dd_serverless_stats_computation_enabled { - info!("serverless stats computation enabled"); - let (service, handle) = - stats_concentrator_service::StatsConcentratorService::new(config.clone()); - tokio::spawn(service.run()); - ( - Some(handle.clone()), - Some(Arc::new(stats_generator::StatsGenerator::new(handle))), - ) - } else { - info!("serverless stats computation disabled"); - (None, None) - }; + let (stats_concentrator_handle, stats_generator, stats_concentrator_service_handle) = + if dd_serverless_stats_computation_enabled { + info!("serverless stats computation enabled"); + let (service, handle) = + stats_concentrator_service::StatsConcentratorService::new(config.clone()); + let task = tokio::spawn(service.run()); + ( + Some(handle.clone()), + Some(Arc::new(stats_generator::StatsGenerator::new(handle))), + Some(task), + ) + } else { + info!("serverless stats computation disabled"); + (None, None, None) + }; let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor { stats_generator: stats_generator.clone(), @@ -202,7 +204,9 @@ pub async fn main() { let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); tokio::spawn(async move { - let res = mini_agent.start_mini_agent(shutdown_rx).await; + let res = mini_agent + .start_mini_agent(shutdown_rx, stats_concentrator_service_handle) + .await; if let Err(e) = res { error!("Error when starting serverless trace mini agent: {e:?}"); } diff --git a/crates/datadog-trace-agent/src/mini_agent.rs b/crates/datadog-trace-agent/src/mini_agent.rs index 8ed2ce67..ac9c60d9 100644 --- a/crates/datadog-trace-agent/src/mini_agent.rs +++ b/crates/datadog-trace-agent/src/mini_agent.rs @@ -53,6 +53,7 @@ impl MiniAgent { pub async fn start_mini_agent( &self, shutdown_rx: tokio::sync::oneshot::Receiver<()>, + stats_concentrator_service_handle: Option>, ) -> Result<(), Box> { let now = Instant::now(); @@ -166,6 +167,7 @@ impl MiniAgent { service, trace_flusher_handle, stats_flusher_handle, + stats_concentrator_service_handle, ) .await?; } @@ -218,6 +220,7 @@ impl MiniAgent { service, trace_flusher_handle, stats_flusher_handle, + stats_concentrator_service_handle, ) .await?; } @@ -230,6 +233,7 @@ impl MiniAgent { service: S, mut trace_flusher_handle: tokio::task::JoinHandle<()>, mut stats_flusher_handle: tokio::task::JoinHandle<()>, + mut stats_concentrator_service_handle: Option>, ) -> Result<(), Box> where S: hyper::service::Service< @@ -283,6 +287,15 @@ impl MiniAgent { error!("Stats flusher task died: {:?}", result); return Err("Stats flusher task terminated unexpectedly".into()); }, + result = async { + match stats_concentrator_service_handle { + Some(ref mut h) => h.await, + None => std::future::pending().await, + } + } => { + error!("Stats concentrator service task died: {:?}", result); + return Err("Stats concentrator service task terminated unexpectedly".into()); + }, }; let conn = hyper_util::rt::TokioIo::new(conn); let server = server.clone(); @@ -301,6 +314,7 @@ impl MiniAgent { service: S, mut trace_flusher_handle: tokio::task::JoinHandle<()>, mut stats_flusher_handle: tokio::task::JoinHandle<()>, + mut stats_concentrator_service_handle: Option>, ) -> Result<(), Box> where S: hyper::service::Service< @@ -371,6 +385,15 @@ impl MiniAgent { error!("Stats flusher task died: {:?}", result); return Err("Stats flusher task terminated unexpectedly".into()); }, + result = async { + match stats_concentrator_service_handle { + Some(ref mut h) => h.await, + None => std::future::pending().await, + } + } => { + error!("Stats concentrator task died: {:?}", result); + return Err("Stats concentrator task terminated unexpectedly".into()); + }, }; // Hyper http parser handles buffering pipe data diff --git a/crates/datadog-trace-agent/tests/integration_test.rs b/crates/datadog-trace-agent/tests/integration_test.rs index 358f98cf..23baab3e 100644 --- a/crates/datadog-trace-agent/tests/integration_test.rs +++ b/crates/datadog-trace-agent/tests/integration_test.rs @@ -45,7 +45,9 @@ pub fn configure_mock_endpoints(config: &mut Config, mock_server_url: &str) { } /// Helper to create a mini agent with real flushers -pub fn create_mini_agent_with_real_flushers(config: Arc) -> MiniAgent { +pub fn create_mini_agent_with_real_flushers( + config: Arc, +) -> (MiniAgent, tokio::task::JoinHandle<()>) { use datadog_trace_agent::{ aggregator::TraceAggregator, stats_concentrator_service::StatsConcentratorService, stats_flusher::ServerlessStatsFlusher, stats_generator::StatsGenerator, @@ -53,14 +55,14 @@ pub fn create_mini_agent_with_real_flushers(config: Arc) -> MiniAgent { }; let (service, stats_concentrator_handle) = StatsConcentratorService::new(config.clone()); - tokio::spawn(service.run()); + let stats_concentrator_service_handle = tokio::spawn(service.run()); let stats_generator = Some(Arc::new(StatsGenerator::new( stats_concentrator_handle.clone(), ))); let aggregator = Arc::new(tokio::sync::Mutex::new(TraceAggregator::default())); - MiniAgent { + let mini_agent = MiniAgent { config: config.clone(), trace_processor: Arc::new(ServerlessTraceProcessor { stats_generator }), trace_flusher: Arc::new(ServerlessTraceFlusher::new( @@ -73,7 +75,8 @@ pub fn create_mini_agent_with_real_flushers(config: Arc) -> MiniAgent { }), env_verifier: Arc::new(MockEnvVerifier), proxy_flusher: Arc::new(ProxyFlusher::new(config.clone())), - } + }; + (mini_agent, stats_concentrator_service_handle) } /// Helper to verify trace request sent to mock server @@ -179,7 +182,7 @@ async fn test_mini_agent_tcp_handles_requests() { // Start the mini agent let agent_handle = tokio::spawn(async move { let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); - let _ = mini_agent.start_mini_agent(shutdown_rx).await; + let _ = mini_agent.start_mini_agent(shutdown_rx, None).await; }); // Give server time to start @@ -279,7 +282,7 @@ async fn test_mini_agent_named_pipe_handles_requests() { // Start the mini agent let agent_handle = tokio::spawn(async move { let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); - let _ = mini_agent.start_mini_agent(shutdown_rx).await; + let _ = mini_agent.start_mini_agent(shutdown_rx, None).await; }); // Give server time to create pipe @@ -348,11 +351,14 @@ async fn test_mini_agent_tcp_with_real_flushers() { let config = Arc::new(config); let test_port = config.dd_apm_receiver_port; - let mini_agent = create_mini_agent_with_real_flushers(config); + let (mini_agent, stats_concentrator_service_handle) = + create_mini_agent_with_real_flushers(config); let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); let agent_handle = tokio::spawn(async move { - let _ = mini_agent.start_mini_agent(shutdown_rx).await; + let _ = mini_agent + .start_mini_agent(shutdown_rx, Some(stats_concentrator_service_handle)) + .await; }); // Wait for server to be ready @@ -392,6 +398,59 @@ async fn test_mini_agent_tcp_with_real_flushers() { agent_handle.abort(); } +#[cfg(test)] +#[tokio::test] +#[serial] +async fn test_concentrator_task_death_shuts_down_mini_agent() { + let mock_server: MockServer = MockServer::start().await; + tokio::time::sleep(Duration::from_millis(50)).await; + + let mut config = create_tcp_test_config(8129); + configure_mock_endpoints(&mut config, &mock_server.url()); + let config = Arc::new(config); + let test_port = config.dd_apm_receiver_port; + + let (mini_agent, stats_concentrator_service_handle) = + create_mini_agent_with_real_flushers(config); + let abort_handle = stats_concentrator_service_handle.abort_handle(); + + let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + let agent_handle = tokio::spawn(async move { + mini_agent + .start_mini_agent(shutdown_rx, Some(stats_concentrator_service_handle)) + .await + .map_err(|e| e.to_string()) + }); + + // Wait for server to be ready + let mut server_ready = false; + for _ in 0..20 { + tokio::time::sleep(Duration::from_millis(50)).await; + if let Ok(response) = send_tcp_request(test_port, "/info", "GET", None, &[]).await + && response.status().is_success() + { + server_ready = true; + break; + } + } + assert!( + server_ready, + "Mini agent server failed to start within timeout" + ); + + // Kill the concentrator task to simulate unexpected task death + abort_handle.abort(); + + // Mini agent should detect the task death and exit with an error + let result = tokio::time::timeout(Duration::from_secs(2), agent_handle) + .await + .expect("mini agent should have exited after concentrator task death"); + assert!( + result.expect("agent task should not panic").is_err(), + "mini agent should return an error when the concentrator task dies" + ); +} + #[cfg(test)] #[tokio::test] #[serial] @@ -404,11 +463,12 @@ async fn test_mini_agent_tcp_with_real_flushers_and_tracer_computed_stats() { let config = Arc::new(config); let test_port = config.dd_apm_receiver_port; - let mini_agent = create_mini_agent_with_real_flushers(config); + let (mini_agent, _stats_concentrator_service_handle) = + create_mini_agent_with_real_flushers(config); let agent_handle = tokio::spawn(async move { let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); - let _ = mini_agent.start_mini_agent(shutdown_rx).await; + let _ = mini_agent.start_mini_agent(shutdown_rx, None).await; }); // Wait for server to be ready @@ -464,11 +524,12 @@ async fn test_mini_agent_named_pipe_with_real_flushers() { config.dd_apm_receiver_port = 0; let config = Arc::new(config); - let mini_agent = create_mini_agent_with_real_flushers(config); + let (mini_agent, _stats_concentrator_service_handle) = + create_mini_agent_with_real_flushers(config); let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); let agent_handle = tokio::spawn(async move { - let _ = mini_agent.start_mini_agent(shutdown_rx).await; + let _ = mini_agent.start_mini_agent(shutdown_rx, None).await; }); // Wait for server to be ready From 85c33e26ce90666a04e95b368ce799430c2d5258 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Mon, 20 Apr 2026 13:04:03 -0400 Subject: [PATCH 05/21] drain in flight handlers prior to shutdown --- crates/datadog-trace-agent/src/mini_agent.rs | 53 ++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/crates/datadog-trace-agent/src/mini_agent.rs b/crates/datadog-trace-agent/src/mini_agent.rs index ac9c60d9..ccbaa170 100644 --- a/crates/datadog-trace-agent/src/mini_agent.rs +++ b/crates/datadog-trace-agent/src/mini_agent.rs @@ -11,6 +11,7 @@ use std::net::SocketAddr; use std::sync::Arc; use std::time::Instant; use tokio::sync::mpsc::{self, Receiver, Sender}; +use tokio::sync::oneshot; use tracing::{debug, error}; use crate::http_utils::{log_and_create_http_response, verify_request_content_length}; @@ -92,12 +93,17 @@ impl MiniAgent { Receiver, ) = mpsc::channel(STATS_PAYLOAD_CHANNEL_BUFFER_SIZE); + // Create a separate shutdown channel for the stats flusher so that serve_tcp + // can drain all in-flight HTTP handlers before triggering the final flush, + // preventing AddChunk/ClientStatsPayload messages from being missed. + let (flusher_shutdown_tx, flusher_shutdown_rx) = oneshot::channel::<()>(); + // start our stats flusher. let stats_flusher = self.stats_flusher.clone(); let stats_config = self.config.clone(); let stats_flusher_handle = tokio::spawn(async move { stats_flusher - .start_stats_flusher(stats_config, stats_rx, shutdown_rx) + .start_stats_flusher(stats_config, stats_rx, flusher_shutdown_rx) .await; }); @@ -168,6 +174,8 @@ impl MiniAgent { trace_flusher_handle, stats_flusher_handle, stats_concentrator_service_handle, + shutdown_rx, + flusher_shutdown_tx, ) .await?; } @@ -221,6 +229,8 @@ impl MiniAgent { trace_flusher_handle, stats_flusher_handle, stats_concentrator_service_handle, + shutdown_rx, + flusher_shutdown_tx, ) .await?; } @@ -232,8 +242,10 @@ impl MiniAgent { listener: tokio::net::TcpListener, service: S, mut trace_flusher_handle: tokio::task::JoinHandle<()>, - mut stats_flusher_handle: tokio::task::JoinHandle<()>, + stats_flusher_handle: tokio::task::JoinHandle<()>, mut stats_concentrator_service_handle: Option>, + mut shutdown_rx: oneshot::Receiver<()>, + flusher_shutdown_tx: oneshot::Sender<()>, ) -> Result<(), Box> where S: hyper::service::Service< @@ -247,6 +259,7 @@ impl MiniAgent { { let server = hyper::server::conn::http1::Builder::new(); let mut joinset = tokio::task::JoinSet::new(); + let mut stats_flusher_handle = stats_flusher_handle; loop { let conn = tokio::select! { @@ -296,6 +309,23 @@ impl MiniAgent { error!("Stats concentrator service task died: {:?}", result); return Err("Stats concentrator service task terminated unexpectedly".into()); }, + _ = &mut shutdown_rx => { + // Drain all in-flight connections so every handler has finished + // writing to the stats/trace channels before we trigger the flush. + while let Some(result) = joinset.join_next().await { + if let Err(e) = result + && e.is_panic() { + std::panic::resume_unwind(e.into_panic()); + } + } + // Signal the stats flusher to force-flush now that all handlers + // have finished writing to the channel. + let _ = flusher_shutdown_tx.send(()); + if let Err(e) = stats_flusher_handle.await { + error!("Stats flusher task failed during shutdown: {e:?}"); + } + return Ok(()); + }, }; let conn = hyper_util::rt::TokioIo::new(conn); let server = server.clone(); @@ -313,8 +343,10 @@ impl MiniAgent { pipe_name: &str, service: S, mut trace_flusher_handle: tokio::task::JoinHandle<()>, - mut stats_flusher_handle: tokio::task::JoinHandle<()>, + stats_flusher_handle: tokio::task::JoinHandle<()>, mut stats_concentrator_service_handle: Option>, + mut shutdown_rx: oneshot::Receiver<()>, + flusher_shutdown_tx: oneshot::Sender<()>, ) -> Result<(), Box> where S: hyper::service::Service< @@ -328,6 +360,7 @@ impl MiniAgent { { let server = hyper::server::conn::http1::Builder::new(); let mut joinset = tokio::task::JoinSet::new(); + let mut stats_flusher_handle = stats_flusher_handle; loop { // Create a new pipe instance @@ -394,6 +427,20 @@ impl MiniAgent { error!("Stats concentrator task died: {:?}", result); return Err("Stats concentrator task terminated unexpectedly".into()); }, + _ = &mut shutdown_rx => { + while let Some(result) = joinset.join_next().await { + if let Err(e) = result { + if e.is_panic() { + std::panic::resume_unwind(e.into_panic()); + } + } + } + let _ = flusher_shutdown_tx.send(()); + if let Err(e) = stats_flusher_handle.await { + error!("Stats flusher task failed during shutdown: {e:?}"); + } + return Ok(()); + }, }; // Hyper http parser handles buffering pipe data From 1bd18c049d2d9457f9becfb95c1795073dc567c9 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Fri, 24 Apr 2026 12:45:43 -0400 Subject: [PATCH 06/21] refactor StatsGenerator into ServerlessTraceProcessor --- crates/datadog-serverless-compat/src/main.rs | 14 ++-- crates/datadog-trace-agent/src/lib.rs | 1 - .../src/stats_generator.rs | 64 ------------------- .../src/trace_processor.rs | 46 ++++++++++--- .../tests/integration_test.rs | 16 ++--- 5 files changed, 50 insertions(+), 91 deletions(-) delete mode 100644 crates/datadog-trace-agent/src/stats_generator.rs diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index 97648d47..88445877 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -19,7 +19,7 @@ use zstd::zstd_safe::CompressionLevel; use datadog_trace_agent::{ aggregator::TraceAggregator, config, env_verifier, mini_agent, proxy_flusher, stats_concentrator_service, stats_flusher, - stats_generator, stats_processor, + stats_processor, trace_flusher::{self, TraceFlusher}, trace_processor, }; @@ -159,24 +159,20 @@ pub async fn main() { } }; - let (stats_concentrator_handle, stats_generator, stats_concentrator_service_handle) = + let (stats_concentrator_handle, stats_concentrator_service_handle) = if dd_serverless_stats_computation_enabled { info!("serverless stats computation enabled"); let (service, handle) = stats_concentrator_service::StatsConcentratorService::new(config.clone()); let task = tokio::spawn(service.run()); - ( - Some(handle.clone()), - Some(Arc::new(stats_generator::StatsGenerator::new(handle))), - Some(task), - ) + (Some(handle), Some(task)) } else { info!("serverless stats computation disabled"); - (None, None, None) + (None, None) }; let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor { - stats_generator: stats_generator.clone(), + stats_concentrator: stats_concentrator_handle.clone(), }); let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher { diff --git a/crates/datadog-trace-agent/src/lib.rs b/crates/datadog-trace-agent/src/lib.rs index daeed742..4c37f08e 100644 --- a/crates/datadog-trace-agent/src/lib.rs +++ b/crates/datadog-trace-agent/src/lib.rs @@ -15,7 +15,6 @@ pub mod mini_agent; pub mod proxy_flusher; pub mod stats_concentrator_service; pub mod stats_flusher; -pub mod stats_generator; pub mod stats_processor; pub mod trace_flusher; pub mod trace_processor; diff --git a/crates/datadog-trace-agent/src/stats_generator.rs b/crates/datadog-trace-agent/src/stats_generator.rs deleted file mode 100644 index 7e06a387..00000000 --- a/crates/datadog-trace-agent/src/stats_generator.rs +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -use std::sync::Arc; - -use crate::stats_concentrator_service::{StatsConcentratorHandle, StatsError}; -use libdd_library_config::tracer_metadata::TracerMetadata; -use libdd_trace_utils::tracer_payload::TracerPayloadCollection; -use tracing::error; - -pub struct StatsGenerator { - stats_concentrator: StatsConcentratorHandle, -} - -#[derive(Debug, thiserror::Error)] -pub enum StatsGeneratorError { - #[error("Failed to send command to stats concentrator: {0}")] - ConcentratorCommandError(StatsError), - #[error("Unsupported tracer payload version. Failed to send trace stats.")] - TracerPayloadVersionError, -} - -// Sends tracer payloads to the stats concentrator -impl StatsGenerator { - #[must_use] - pub fn new(stats_concentrator: StatsConcentratorHandle) -> Self { - Self { stats_concentrator } - } - - pub fn send( - &self, - tracer_payload_collection: &TracerPayloadCollection, - ) -> Result<(), StatsGeneratorError> { - if let TracerPayloadCollection::V07(tracer_payloads) = tracer_payload_collection { - for tracer_payload in tracer_payloads { - let metadata = Arc::new(TracerMetadata { - schema_version: 2, - runtime_id: None, - tracer_language: tracer_payload.language_name.clone(), - tracer_version: tracer_payload.tracer_version.clone(), - hostname: String::new(), - service_name: None, - service_env: Some(tracer_payload.env.clone()), - service_version: Some(tracer_payload.app_version.clone()), - process_tags: None, - container_id: Some(tracer_payload.container_id.clone()), - }); - for chunk in &tracer_payload.chunks { - if let Err(err) = self - .stats_concentrator - .add_chunk(chunk.clone(), Arc::clone(&metadata)) - { - error!("Failed to send trace chunk to concentrator: {err}"); - return Err(StatsGeneratorError::ConcentratorCommandError(err)); - } - } - } - Ok(()) - } else { - error!("Unsupported tracer payload version. Failed to send trace stats."); - Err(StatsGeneratorError::TracerPayloadVersionError) - } - } -} diff --git a/crates/datadog-trace-agent/src/trace_processor.rs b/crates/datadog-trace-agent/src/trace_processor.rs index bd385e01..c64de2f3 100644 --- a/crates/datadog-trace-agent/src/trace_processor.rs +++ b/crates/datadog-trace-agent/src/trace_processor.rs @@ -6,6 +6,7 @@ use std::sync::Arc; use async_trait::async_trait; use hyper::{StatusCode, http}; use libdd_common::http_common; +use libdd_library_config::tracer_metadata::TracerMetadata; use tokio::sync::mpsc::Sender; use tracing::{debug, error}; @@ -18,7 +19,7 @@ use libdd_trace_utils::tracer_payload::{TraceChunkProcessor, TracerPayloadCollec use crate::{ config::Config, http_utils::{self, log_and_create_http_response, log_and_create_traces_success_http_response}, - stats_generator::StatsGenerator, + stats_concentrator_service::StatsConcentratorHandle, }; const TRACER_PAYLOAD_FUNCTION_TAGS_TAG_KEY: &str = "_dd.tags.function"; @@ -67,8 +68,38 @@ impl TraceChunkProcessor for ChunkProcessor { } #[derive(Clone)] pub struct ServerlessTraceProcessor { - /// The stats generator to use for generating stats and sending them to the stats concentrator. - pub stats_generator: Option>, + pub stats_concentrator: Option, +} + +impl ServerlessTraceProcessor { + fn send_to_concentrator( + concentrator: &StatsConcentratorHandle, + payload: &TracerPayloadCollection, + ) { + if let TracerPayloadCollection::V07(tracer_payloads) = payload { + for tracer_payload in tracer_payloads { + let metadata = Arc::new(TracerMetadata { + schema_version: 2, + runtime_id: None, + tracer_language: tracer_payload.language_name.clone(), + tracer_version: tracer_payload.tracer_version.clone(), + hostname: String::new(), + service_name: None, + service_env: Some(tracer_payload.env.clone()), + service_version: Some(tracer_payload.app_version.clone()), + process_tags: None, + container_id: Some(tracer_payload.container_id.clone()), + }); + for chunk in &tracer_payload.chunks { + if let Err(e) = concentrator.add_chunk(chunk.clone(), Arc::clone(&metadata)) { + error!("Failed to send trace chunk to concentrator: {e}"); + } + } + } + } else { + error!("Unsupported tracer payload version. Failed to send trace stats."); + } + } } #[async_trait] @@ -143,11 +174,10 @@ impl TraceProcessor for ServerlessTraceProcessor { } } - if let Some(stats_generator) = self.stats_generator.as_ref() + if let Some(ref concentrator) = self.stats_concentrator && !tracer_header_tags.client_computed_stats - && let Err(e) = stats_generator.send(&payload) { - error!("Stats generator error: {e}"); + Self::send_to_concentrator(concentrator, &payload); } let send_data = SendData::new(body_size, payload, tracer_header_tags, &config.trace_intake); @@ -268,7 +298,7 @@ mod tests { .unwrap(); let trace_processor = trace_processor::ServerlessTraceProcessor { - stats_generator: None, + stats_concentrator: None, }; let res = trace_processor .process_traces( @@ -342,7 +372,7 @@ mod tests { .unwrap(); let trace_processor = trace_processor::ServerlessTraceProcessor { - stats_generator: None, + stats_concentrator: None, }; let res = trace_processor .process_traces( diff --git a/crates/datadog-trace-agent/tests/integration_test.rs b/crates/datadog-trace-agent/tests/integration_test.rs index 23baab3e..70931523 100644 --- a/crates/datadog-trace-agent/tests/integration_test.rs +++ b/crates/datadog-trace-agent/tests/integration_test.rs @@ -50,21 +50,19 @@ pub fn create_mini_agent_with_real_flushers( ) -> (MiniAgent, tokio::task::JoinHandle<()>) { use datadog_trace_agent::{ aggregator::TraceAggregator, stats_concentrator_service::StatsConcentratorService, - stats_flusher::ServerlessStatsFlusher, stats_generator::StatsGenerator, - stats_processor::ServerlessStatsProcessor, trace_flusher::ServerlessTraceFlusher, + stats_flusher::ServerlessStatsFlusher, stats_processor::ServerlessStatsProcessor, + trace_flusher::ServerlessTraceFlusher, }; let (service, stats_concentrator_handle) = StatsConcentratorService::new(config.clone()); let stats_concentrator_service_handle = tokio::spawn(service.run()); - let stats_generator = Some(Arc::new(StatsGenerator::new( - stats_concentrator_handle.clone(), - ))); - let aggregator = Arc::new(tokio::sync::Mutex::new(TraceAggregator::default())); let mini_agent = MiniAgent { config: config.clone(), - trace_processor: Arc::new(ServerlessTraceProcessor { stats_generator }), + trace_processor: Arc::new(ServerlessTraceProcessor { + stats_concentrator: Some(stats_concentrator_handle.clone()), + }), trace_flusher: Arc::new(ServerlessTraceFlusher::new( aggregator.clone(), config.clone(), @@ -170,7 +168,7 @@ async fn test_mini_agent_tcp_handles_requests() { let mini_agent = MiniAgent { config: config.clone(), trace_processor: Arc::new(ServerlessTraceProcessor { - stats_generator: None, + stats_concentrator: None, }), trace_flusher: Arc::new(MockTraceFlusher), stats_processor: Arc::new(MockStatsProcessor), @@ -270,7 +268,7 @@ async fn test_mini_agent_named_pipe_handles_requests() { let mini_agent = MiniAgent { config: config.clone(), trace_processor: Arc::new(ServerlessTraceProcessor { - stats_generator: None, + stats_concentrator: None, }), trace_flusher: Arc::new(MockTraceFlusher), stats_processor: Arc::new(MockStatsProcessor), From 5132865f5581781d80cff51ea67c87e354e2d974 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Fri, 24 Apr 2026 12:50:31 -0400 Subject: [PATCH 07/21] remove should_flush_stats_buffer from stats_flusher --- .../datadog-trace-agent/src/stats_flusher.rs | 28 +------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/crates/datadog-trace-agent/src/stats_flusher.rs b/crates/datadog-trace-agent/src/stats_flusher.rs index 2fb0597a..ff85ac54 100644 --- a/crates/datadog-trace-agent/src/stats_flusher.rs +++ b/crates/datadog-trace-agent/src/stats_flusher.rs @@ -14,14 +14,6 @@ use libdd_trace_utils::stats_utils; use crate::config::Config; use crate::stats_concentrator_service::StatsConcentratorHandle; -/// Whether the stats flusher should run `flush_stats` -fn should_flush_stats_buffer( - channel_has_tracer_stats: bool, - serverless_stats_enabled: bool, -) -> bool { - channel_has_tracer_stats || serverless_stats_enabled -} - /// Serializes and sends a single `StatsPayload` to the intake. async fn send_stats_payload(config: &Arc, payload: pb::StatsPayload) { debug!("Stats payload to be sent: {payload:?}"); @@ -94,11 +86,7 @@ impl StatsFlusher for ServerlessStatsFlusher { // Drain client stats in buffer and stats from concentrator on interval _ = interval.tick() => { let client_stats = std::mem::take(&mut buffer); - let should_flush = should_flush_stats_buffer( - !client_stats.is_empty(), - self.stats_concentrator.is_some(), - ); - if should_flush { + if !client_stats.is_empty() || self.stats_concentrator.is_some() { self.flush_stats(config.clone(), client_stats, false).await; } } @@ -143,17 +131,3 @@ impl StatsFlusher for ServerlessStatsFlusher { } } } - -#[cfg(test)] -mod tests { - use super::should_flush_stats_buffer; - - #[test] - fn should_flush_stats_buffer_all_cases() { - // (stats channel empty, serverless computed stats enabled with concentrator) - assert!(!should_flush_stats_buffer(false, false)); - assert!(should_flush_stats_buffer(true, false)); - assert!(should_flush_stats_buffer(false, true)); - assert!(should_flush_stats_buffer(true, true)); - } -} From 4bd780f42c1e1190527e004997f3ddce1acbaf0a Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Fri, 24 Apr 2026 14:00:01 -0400 Subject: [PATCH 08/21] only return errors rather than logging one error and returning another --- crates/datadog-trace-agent/src/mini_agent.rs | 22 +++++++------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/crates/datadog-trace-agent/src/mini_agent.rs b/crates/datadog-trace-agent/src/mini_agent.rs index ccbaa170..728e7dde 100644 --- a/crates/datadog-trace-agent/src/mini_agent.rs +++ b/crates/datadog-trace-agent/src/mini_agent.rs @@ -293,12 +293,10 @@ impl MiniAgent { }, // If there's some error in the background tasks, we can't send data result = &mut trace_flusher_handle => { - error!("Trace flusher task died: {:?}", result); - return Err("Trace flusher task terminated unexpectedly".into()); + return Err(format!("Trace flusher task terminated unexpectedly: {result:?}").into()); }, result = &mut stats_flusher_handle => { - error!("Stats flusher task died: {:?}", result); - return Err("Stats flusher task terminated unexpectedly".into()); + return Err(format!("Stats flusher task terminated unexpectedly: {result:?}").into()); }, result = async { match stats_concentrator_service_handle { @@ -306,8 +304,7 @@ impl MiniAgent { None => std::future::pending().await, } } => { - error!("Stats concentrator service task died: {:?}", result); - return Err("Stats concentrator service task terminated unexpectedly".into()); + return Err(format!("Stats concentrator service task terminated unexpectedly: {result:?}").into()); }, _ = &mut shutdown_rx => { // Drain all in-flight connections so every handler has finished @@ -322,7 +319,7 @@ impl MiniAgent { // have finished writing to the channel. let _ = flusher_shutdown_tx.send(()); if let Err(e) = stats_flusher_handle.await { - error!("Stats flusher task failed during shutdown: {e:?}"); + return Err(format!("Stats flusher task failed during shutdown: {e:?}").into()); } return Ok(()); }, @@ -411,12 +408,10 @@ impl MiniAgent { }, // If there's some error in the background tasks, we can't send data result = &mut trace_flusher_handle => { - error!("Trace flusher task died: {:?}", result); - return Err("Trace flusher task terminated unexpectedly".into()); + return Err(format!("Trace flusher task terminated unexpectedly: {result:?}").into()); }, result = &mut stats_flusher_handle => { - error!("Stats flusher task died: {:?}", result); - return Err("Stats flusher task terminated unexpectedly".into()); + return Err(format!("Stats flusher task terminated unexpectedly: {result:?}").into()); }, result = async { match stats_concentrator_service_handle { @@ -424,8 +419,7 @@ impl MiniAgent { None => std::future::pending().await, } } => { - error!("Stats concentrator task died: {:?}", result); - return Err("Stats concentrator task terminated unexpectedly".into()); + return Err(format!("Stats concentrator service task terminated unexpectedly: {result:?}").into()); }, _ = &mut shutdown_rx => { while let Some(result) = joinset.join_next().await { @@ -437,7 +431,7 @@ impl MiniAgent { } let _ = flusher_shutdown_tx.send(()); if let Err(e) = stats_flusher_handle.await { - error!("Stats flusher task failed during shutdown: {e:?}"); + return Err(format!("Stats flusher task failed during shutdown: {e:?}").into()); } return Ok(()); }, From 075545228dcf20cac82d229a538d3f72f7c923cd Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Fri, 24 Apr 2026 14:11:57 -0400 Subject: [PATCH 09/21] wait for the mini agent to finish shutting down in integration tests before asserting the results --- crates/datadog-trace-agent/tests/integration_test.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/crates/datadog-trace-agent/tests/integration_test.rs b/crates/datadog-trace-agent/tests/integration_test.rs index 70931523..6c5bdceb 100644 --- a/crates/datadog-trace-agent/tests/integration_test.rs +++ b/crates/datadog-trace-agent/tests/integration_test.rs @@ -389,11 +389,8 @@ async fn test_mini_agent_tcp_with_real_flushers() { // Trigger shutdown to force flush in progress concentrator buckets let _ = shutdown_tx.send(()); - tokio::time::sleep(FLUSH_WAIT_DURATION).await; + let _ = agent_handle.await; verify_stats_request(&mock_server); // Stats generator should generate stats from trace payload - - // Clean up - agent_handle.abort(); } #[cfg(test)] @@ -560,9 +557,6 @@ async fn test_mini_agent_named_pipe_with_real_flushers() { // Trigger shutdown to force flush in progress concentrator buckets let _ = shutdown_tx.send(()); - tokio::time::sleep(FLUSH_WAIT_DURATION).await; + let _ = agent_handle.await; verify_stats_request(&mock_server); - - // Clean up - agent_handle.abort(); } From e46f752adbad8b73268172b2ae8076dd33364f65 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Fri, 24 Apr 2026 14:23:17 -0400 Subject: [PATCH 10/21] add StatsConcentratorComponents struct --- crates/datadog-serverless-compat/src/main.rs | 34 ++++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index 88445877..25f89520 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -50,6 +50,11 @@ const DEFAULT_DOGSTATSD_PORT: u16 = 8125; const DEFAULT_LOG_INTAKE_PORT: u16 = 10517; const AGENT_HOST: &str = "0.0.0.0"; +struct StatsConcentratorComponents { + handle: stats_concentrator_service::StatsConcentratorHandle, + service_handle: tokio::task::JoinHandle<()>, +} + #[tokio::main] pub async fn main() { let log_level = env::var("DD_LOG_LEVEL") @@ -159,24 +164,25 @@ pub async fn main() { } }; - let (stats_concentrator_handle, stats_concentrator_service_handle) = - if dd_serverless_stats_computation_enabled { - info!("serverless stats computation enabled"); - let (service, handle) = - stats_concentrator_service::StatsConcentratorService::new(config.clone()); - let task = tokio::spawn(service.run()); - (Some(handle), Some(task)) - } else { - info!("serverless stats computation disabled"); - (None, None) - }; + let stats_concentrator = if dd_serverless_stats_computation_enabled { + info!("serverless stats computation enabled"); + let (service, handle) = + stats_concentrator_service::StatsConcentratorService::new(config.clone()); + Some(StatsConcentratorComponents { + service_handle: tokio::spawn(service.run()), + handle, + }) + } else { + info!("serverless stats computation disabled"); + None + }; let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor { - stats_concentrator: stats_concentrator_handle.clone(), + stats_concentrator: stats_concentrator.as_ref().map(|c| c.handle.clone()), }); let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher { - stats_concentrator: stats_concentrator_handle.clone(), + stats_concentrator: stats_concentrator.as_ref().map(|c| c.handle.clone()), }); let stats_processor = Arc::new(stats_processor::ServerlessStatsProcessor {}); @@ -201,7 +207,7 @@ pub async fn main() { let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); tokio::spawn(async move { let res = mini_agent - .start_mini_agent(shutdown_rx, stats_concentrator_service_handle) + .start_mini_agent(shutdown_rx, stats_concentrator.map(|c| c.service_handle)) .await; if let Err(e) = res { error!("Error when starting serverless trace mini agent: {e:?}"); From 8e8390622924a374d48734104915e3e7bc5285d6 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Fri, 24 Apr 2026 14:25:35 -0400 Subject: [PATCH 11/21] update licenses --- LICENSE-3rdparty.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 74fa67bc..9c0a9bed 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -124,6 +124,7 @@ libdd-data-pipeline,https://github.com/DataDog/libdatadog/tree/main/libdd-data-p libdd-ddsketch,https://github.com/DataDog/libdatadog/tree/main/libdd-ddsketch,Apache-2.0,The libdd-ddsketch Authors libdd-dogstatsd-client,https://github.com/DataDog/libdatadog/tree/main/libdd-dogstatsd-client,Apache-2.0,The libdd-dogstatsd-client Authors libdd-library-config,https://github.com/DataDog/libdatadog/tree/main/libdd-library-config,Apache-2.0,The libdd-library-config Authors +libdd-shared-runtime,https://github.com/DataDog/libdatadog/tree/main/libdd-shared-runtime,Apache-2.0,The libdd-shared-runtime Authors libdd-telemetry,https://github.com/DataDog/libdatadog/tree/main/libdd-telemetry,Apache-2.0,The libdd-telemetry Authors libdd-tinybytes,https://github.com/DataDog/libdatadog/tree/main/libdd-tinybytes,Apache-2.0,The libdd-tinybytes Authors libdd-trace-normalization,https://github.com/DataDog/libdatadog/tree/main/libdd-trace-normalization,Apache-2.0,David Lee From 16044f11a3f42ac106f137b9a4afc70f3e72005e Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Fri, 24 Apr 2026 14:43:36 -0400 Subject: [PATCH 12/21] rename DD_SERVERLESS_STATS_COMPUTATION_ENABLED to DD_AGENT_STATS_COMPUTATION_ENABLED --- crates/datadog-serverless-compat/src/main.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index 25f89520..bd5f2337 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -126,10 +126,9 @@ pub async fn main() { .and_then(|v| v.parse::().ok()) .unwrap_or(DEFAULT_LOG_INTAKE_PORT); - let dd_serverless_stats_computation_enabled = - env::var("DD_SERVERLESS_STATS_COMPUTATION_ENABLED") - .map(|val| val.to_lowercase() != "false") - .unwrap_or(true); + let dd_agent_stats_computation_enabled = env::var("DD_AGENT_STATS_COMPUTATION_ENABLED") + .map(|val| val.to_lowercase() != "false") + .unwrap_or(true); debug!("Starting serverless trace mini agent"); @@ -164,8 +163,8 @@ pub async fn main() { } }; - let stats_concentrator = if dd_serverless_stats_computation_enabled { - info!("serverless stats computation enabled"); + let stats_concentrator = if dd_agent_stats_computation_enabled { + info!("agent stats computation enabled"); let (service, handle) = stats_concentrator_service::StatsConcentratorService::new(config.clone()); Some(StatsConcentratorComponents { @@ -173,7 +172,7 @@ pub async fn main() { handle, }) } else { - info!("serverless stats computation disabled"); + info!("agent stats computation disabled"); None }; From 8b2e49f7c9cb53882a9725bb9821d15ee364da01 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Fri, 24 Apr 2026 15:18:31 -0400 Subject: [PATCH 13/21] check for multiple tracers in stats concentrator service --- Cargo.lock | 28 +++---- crates/datadog-agent-config/Cargo.toml | 4 +- crates/datadog-serverless-compat/Cargo.toml | 2 +- crates/datadog-trace-agent/Cargo.toml | 18 ++--- .../src/stats_concentrator_service.rs | 77 ++++++++++++++++++- 5 files changed, 101 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 46feb1f2..1627f9d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1434,7 +1434,7 @@ checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libdd-capabilities" version = "0.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "anyhow", "bytes", @@ -1445,7 +1445,7 @@ dependencies = [ [[package]] name = "libdd-capabilities-impl" version = "0.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "bytes", "http", @@ -1488,7 +1488,7 @@ dependencies = [ [[package]] name = "libdd-common" version = "3.0.2" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "anyhow", "bytes", @@ -1561,7 +1561,7 @@ dependencies = [ [[package]] name = "libdd-ddsketch" version = "1.0.1" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "prost 0.14.3", ] @@ -1583,7 +1583,7 @@ dependencies = [ [[package]] name = "libdd-library-config" version = "1.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "anyhow", "libc", @@ -1600,7 +1600,7 @@ dependencies = [ [[package]] name = "libdd-shared-runtime" version = "0.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "async-trait", "futures", @@ -1648,7 +1648,7 @@ dependencies = [ [[package]] name = "libdd-tinybytes" version = "1.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "serde", ] @@ -1666,7 +1666,7 @@ dependencies = [ [[package]] name = "libdd-trace-normalization" version = "2.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "anyhow", "libdd-trace-protobuf 3.0.1", @@ -1675,7 +1675,7 @@ dependencies = [ [[package]] name = "libdd-trace-obfuscation" version = "2.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "anyhow", "fluent-uri", @@ -1703,7 +1703,7 @@ dependencies = [ [[package]] name = "libdd-trace-protobuf" version = "3.0.1" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "prost 0.14.3", "serde", @@ -1725,7 +1725,7 @@ dependencies = [ [[package]] name = "libdd-trace-stats" version = "2.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "anyhow", "async-trait", @@ -1734,7 +1734,7 @@ dependencies = [ "libdd-capabilities", "libdd-capabilities-impl", "libdd-common 3.0.2", - "libdd-ddsketch 1.0.1 (git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a)", + "libdd-ddsketch 1.0.1 (git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd)", "libdd-shared-runtime", "libdd-trace-protobuf 3.0.1", "libdd-trace-utils 3.0.1", @@ -1776,7 +1776,7 @@ dependencies = [ [[package]] name = "libdd-trace-utils" version = "3.0.1" -source = "git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a#27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" +source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" dependencies = [ "anyhow", "base64 0.22.1", @@ -1795,7 +1795,7 @@ dependencies = [ "libdd-capabilities", "libdd-capabilities-impl", "libdd-common 3.0.2", - "libdd-tinybytes 1.1.0 (git+https://github.com/DataDog/libdatadog?rev=27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a)", + "libdd-tinybytes 1.1.0 (git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd)", "libdd-trace-normalization 2.0.0", "libdd-trace-protobuf 3.0.1", "prost 0.14.3", diff --git a/crates/datadog-agent-config/Cargo.toml b/crates/datadog-agent-config/Cargo.toml index 75da8de0..a20a49d7 100644 --- a/crates/datadog-agent-config/Cargo.toml +++ b/crates/datadog-agent-config/Cargo.toml @@ -6,8 +6,8 @@ license.workspace = true [dependencies] figment = { version = "0.10", default-features = false, features = ["yaml", "env"] } -libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } +libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } log = { version = "0.4", default-features = false } serde = { version = "1.0", default-features = false, features = ["derive"] } serde-aux = { version = "4.7", default-features = false } diff --git a/crates/datadog-serverless-compat/Cargo.toml b/crates/datadog-serverless-compat/Cargo.toml index 095ecfc4..4d8a9fe8 100644 --- a/crates/datadog-serverless-compat/Cargo.toml +++ b/crates/datadog-serverless-compat/Cargo.toml @@ -12,7 +12,7 @@ windows-pipes = ["datadog-trace-agent/windows-pipes", "dogstatsd/windows-pipes"] [dependencies] datadog-logs-agent = { path = "../datadog-logs-agent" } datadog-trace-agent = { path = "../datadog-trace-agent" } -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } datadog-fips = { path = "../datadog-fips", default-features = false } dogstatsd = { path = "../dogstatsd", default-features = true } reqwest = { version = "0.12.4", default-features = false } diff --git a/crates/datadog-trace-agent/Cargo.toml b/crates/datadog-trace-agent/Cargo.toml index 3c9e42ed..89bf65e2 100644 --- a/crates/datadog-trace-agent/Cargo.toml +++ b/crates/datadog-trace-agent/Cargo.toml @@ -25,16 +25,16 @@ tracing = { version = "0.1", default-features = false } serde = { version = "1.0.145", features = ["derive"] } serde_json = "1.0" thiserror = { version = "1.0.58", default-features = false } -libdd-capabilities = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } -libdd-capabilities-impl = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } -libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } -libdd-library-config = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } -libdd-trace-protobuf = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } -libdd-trace-stats = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a", features = [ +libdd-capabilities = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } +libdd-capabilities-impl = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } +libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } +libdd-library-config = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } +libdd-trace-protobuf = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } +libdd-trace-stats = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd", features = [ "mini_agent", ] } -libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a" } +libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } datadog-fips = { path = "../datadog-fips" } reqwest = { version = "0.12.23", features = [ "json", @@ -48,6 +48,6 @@ serial_test = "2.0.0" duplicate = "2.0.1" temp-env = "0.3.6" tempfile = "3.3.0" -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "27aa92cfeeca073d8730a8b4974bd3fdef7ddf3a", features = [ +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd", features = [ "test-utils", ] } diff --git a/crates/datadog-trace-agent/src/stats_concentrator_service.rs b/crates/datadog-trace-agent/src/stats_concentrator_service.rs index af052c87..3f813188 100644 --- a/crates/datadog-trace-agent/src/stats_concentrator_service.rs +++ b/crates/datadog-trace-agent/src/stats_concentrator_service.rs @@ -91,8 +91,17 @@ impl StatsConcentratorService { while let Some(command) = self.rx.recv().await { match command { ConcentratorCommand::AddChunk(chunk, metadata) => { - if self.tracer_metadata.is_none() { - self.tracer_metadata = Some(metadata); + match &self.tracer_metadata { + None => self.tracer_metadata = Some(metadata), + Some(concentrator_metadata) + if concentrator_metadata.as_ref() != metadata.as_ref() => + { + error!( + "Multiple tracers detected: stats concentrator service received metadata from a different tracer. Expected: {concentrator_metadata:?}, received: {metadata:?}." + ); + return; + } + Some(_) => {} } for span in &chunk.spans { self.concentrator.add_span(span); @@ -156,3 +165,67 @@ impl StatsConcentratorService { } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::test_helpers::create_tcp_test_config; + use libdd_trace_protobuf::pb::TraceChunk; + + fn make_metadata(language: &str) -> Arc { + Arc::new(TracerMetadata { + tracer_language: language.to_string(), + ..Default::default() + }) + } + + fn empty_chunk() -> TraceChunk { + TraceChunk { + spans: vec![], + ..Default::default() + } + } + + #[tokio::test] + async fn test_stops_on_multiple_tracers() { + let config = Arc::new(create_tcp_test_config(0)); + let (service, handle) = StatsConcentratorService::new(config); + let service_handle = tokio::spawn(service.run()); + + // First tracer — sets metadata + handle + .add_chunk(empty_chunk(), make_metadata("python")) + .unwrap(); + + // Second tracer with different metadata — should stop the service + handle + .add_chunk(empty_chunk(), make_metadata("nodejs")) + .unwrap(); + + // Service task should complete promptly after detecting multiple tracers + tokio::time::timeout(std::time::Duration::from_secs(1), service_handle) + .await + .expect("service did not stop after detecting multiple tracers") + .unwrap(); + } + + #[tokio::test] + async fn test_continues_with_same_tracer() { + let config = Arc::new(create_tcp_test_config(0)); + let (service, handle) = StatsConcentratorService::new(config); + let service_handle = tokio::spawn(service.run()); + + handle + .add_chunk(empty_chunk(), make_metadata("python")) + .unwrap(); + handle + .add_chunk(empty_chunk(), make_metadata("python")) + .unwrap(); + + // Service should still be running — flush should succeed + assert!(handle.flush(false).await.is_ok()); + + drop(handle); + let _ = service_handle.await; + } +} From 497cacdbb7ac2e480bf327bb26450f82da0e85f0 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Mon, 27 Apr 2026 10:32:44 -0400 Subject: [PATCH 14/21] add comment for client_drop_p0s --- crates/datadog-trace-agent/src/mini_agent.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/datadog-trace-agent/src/mini_agent.rs b/crates/datadog-trace-agent/src/mini_agent.rs index 728e7dde..ef2660d9 100644 --- a/crates/datadog-trace-agent/src/mini_agent.rs +++ b/crates/datadog-trace-agent/src/mini_agent.rs @@ -588,6 +588,8 @@ impl MiniAgent { INFO_ENDPOINT_PATH, PROFILING_ENDPOINT_PATH ], + // client_drop_p0s tells the tracer whether it should drop unsampled p0 traces before sending them. + // In order to actually support this, sampling rates need to be sent back to the tracer. "client_drop_p0s": false, "config": config_json } From f6ed1728f5e582d47fbb7afeb9b6cc180bcca2e1 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Mon, 27 Apr 2026 10:35:28 -0400 Subject: [PATCH 15/21] add comments to clarify trace stats flushing --- crates/datadog-trace-agent/src/stats_flusher.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/datadog-trace-agent/src/stats_flusher.rs b/crates/datadog-trace-agent/src/stats_flusher.rs index ff85ac54..3320c0c6 100644 --- a/crates/datadog-trace-agent/src/stats_flusher.rs +++ b/crates/datadog-trace-agent/src/stats_flusher.rs @@ -86,6 +86,8 @@ impl StatsFlusher for ServerlessStatsFlusher { // Drain client stats in buffer and stats from concentrator on interval _ = interval.tick() => { let client_stats = std::mem::take(&mut buffer); + // Flush if trace stats are received from the tracer + // or if there is a stats concentrator for agent computed trace stats if !client_stats.is_empty() || self.stats_concentrator.is_some() { self.flush_stats(config.clone(), client_stats, false).await; } @@ -117,7 +119,7 @@ impl StatsFlusher for ServerlessStatsFlusher { send_stats_payload(&config, payload).await; } - // Flush concentrator stats + // Flush agent computed trace stats from the stats concentrator if let Some(ref concentrator) = self.stats_concentrator { match concentrator.flush(force_flush).await { Ok(Some(agent_stats)) => { From b7209e525c582505e2b2301dae5ab2f6b9bd33fa Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Mon, 27 Apr 2026 11:12:53 -0400 Subject: [PATCH 16/21] log version of unsupported tracer payload for trace stats computation --- crates/datadog-trace-agent/src/trace_processor.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/datadog-trace-agent/src/trace_processor.rs b/crates/datadog-trace-agent/src/trace_processor.rs index c64de2f3..8b66486a 100644 --- a/crates/datadog-trace-agent/src/trace_processor.rs +++ b/crates/datadog-trace-agent/src/trace_processor.rs @@ -97,7 +97,12 @@ impl ServerlessTraceProcessor { } } } else { - error!("Unsupported tracer payload version. Failed to send trace stats."); + let version = match payload { + TracerPayloadCollection::V04(_) => "V04", + TracerPayloadCollection::V05(_) => "V05", + TracerPayloadCollection::V07(_) => unreachable!(), + }; + error!("Unsupported tracer payload version {version}. Failed to send trace stats."); } } } From 92759ec5208239d52e330e4e1d8f499812d31297 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Mon, 27 Apr 2026 11:21:58 -0400 Subject: [PATCH 17/21] update libdatadog rev to 971c407d856db58baf1078bd7802abe13bac4f9f --- Cargo.lock | 46 ++++++++++----------- crates/datadog-agent-config/Cargo.toml | 4 +- crates/datadog-serverless-compat/Cargo.toml | 2 +- crates/datadog-trace-agent/Cargo.toml | 18 ++++---- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1627f9d8..1a03952d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -567,7 +567,7 @@ dependencies = [ "hyper-util", "libdd-capabilities", "libdd-capabilities-impl", - "libdd-common 3.0.2", + "libdd-common 4.0.0", "libdd-library-config", "libdd-trace-obfuscation", "libdd-trace-protobuf 3.0.1", @@ -1433,8 +1433,8 @@ checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libdd-capabilities" -version = "0.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +version = "1.0.0" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "anyhow", "bytes", @@ -1444,14 +1444,14 @@ dependencies = [ [[package]] name = "libdd-capabilities-impl" -version = "0.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +version = "1.0.0" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "bytes", "http", "http-body-util", "libdd-capabilities", - "libdd-common 3.0.2", + "libdd-common 4.0.0", ] [[package]] @@ -1487,8 +1487,8 @@ dependencies = [ [[package]] name = "libdd-common" -version = "3.0.2" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +version = "4.0.0" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "anyhow", "bytes", @@ -1561,7 +1561,7 @@ dependencies = [ [[package]] name = "libdd-ddsketch" version = "1.0.1" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "prost 0.14.3", ] @@ -1583,7 +1583,7 @@ dependencies = [ [[package]] name = "libdd-library-config" version = "1.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "anyhow", "libc", @@ -1600,12 +1600,12 @@ dependencies = [ [[package]] name = "libdd-shared-runtime" version = "0.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "async-trait", "futures", "libdd-capabilities", - "libdd-common 3.0.2", + "libdd-common 4.0.0", "tokio", "tokio-util", "tracing", @@ -1648,7 +1648,7 @@ dependencies = [ [[package]] name = "libdd-tinybytes" version = "1.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "serde", ] @@ -1666,7 +1666,7 @@ dependencies = [ [[package]] name = "libdd-trace-normalization" version = "2.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "anyhow", "libdd-trace-protobuf 3.0.1", @@ -1675,11 +1675,11 @@ dependencies = [ [[package]] name = "libdd-trace-obfuscation" version = "2.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "anyhow", "fluent-uri", - "libdd-common 3.0.2", + "libdd-common 4.0.0", "libdd-trace-protobuf 3.0.1", "libdd-trace-utils 3.0.1", "log", @@ -1703,7 +1703,7 @@ dependencies = [ [[package]] name = "libdd-trace-protobuf" version = "3.0.1" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "prost 0.14.3", "serde", @@ -1725,7 +1725,7 @@ dependencies = [ [[package]] name = "libdd-trace-stats" version = "2.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "anyhow", "async-trait", @@ -1733,8 +1733,8 @@ dependencies = [ "http", "libdd-capabilities", "libdd-capabilities-impl", - "libdd-common 3.0.2", - "libdd-ddsketch 1.0.1 (git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd)", + "libdd-common 4.0.0", + "libdd-ddsketch 1.0.1 (git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f)", "libdd-shared-runtime", "libdd-trace-protobuf 3.0.1", "libdd-trace-utils 3.0.1", @@ -1776,7 +1776,7 @@ dependencies = [ [[package]] name = "libdd-trace-utils" version = "3.0.1" -source = "git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd#e4d93e5274f176e110dbacd745e922268ac31ccd" +source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" dependencies = [ "anyhow", "base64 0.22.1", @@ -1794,8 +1794,8 @@ dependencies = [ "indexmap", "libdd-capabilities", "libdd-capabilities-impl", - "libdd-common 3.0.2", - "libdd-tinybytes 1.1.0 (git+https://github.com/DataDog/libdatadog?rev=e4d93e5274f176e110dbacd745e922268ac31ccd)", + "libdd-common 4.0.0", + "libdd-tinybytes 1.1.0 (git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f)", "libdd-trace-normalization 2.0.0", "libdd-trace-protobuf 3.0.1", "prost 0.14.3", diff --git a/crates/datadog-agent-config/Cargo.toml b/crates/datadog-agent-config/Cargo.toml index a20a49d7..f1e2b37d 100644 --- a/crates/datadog-agent-config/Cargo.toml +++ b/crates/datadog-agent-config/Cargo.toml @@ -6,8 +6,8 @@ license.workspace = true [dependencies] figment = { version = "0.10", default-features = false, features = ["yaml", "env"] } -libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } +libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } log = { version = "0.4", default-features = false } serde = { version = "1.0", default-features = false, features = ["derive"] } serde-aux = { version = "4.7", default-features = false } diff --git a/crates/datadog-serverless-compat/Cargo.toml b/crates/datadog-serverless-compat/Cargo.toml index 4d8a9fe8..7aadae7c 100644 --- a/crates/datadog-serverless-compat/Cargo.toml +++ b/crates/datadog-serverless-compat/Cargo.toml @@ -12,7 +12,7 @@ windows-pipes = ["datadog-trace-agent/windows-pipes", "dogstatsd/windows-pipes"] [dependencies] datadog-logs-agent = { path = "../datadog-logs-agent" } datadog-trace-agent = { path = "../datadog-trace-agent" } -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } datadog-fips = { path = "../datadog-fips", default-features = false } dogstatsd = { path = "../dogstatsd", default-features = true } reqwest = { version = "0.12.4", default-features = false } diff --git a/crates/datadog-trace-agent/Cargo.toml b/crates/datadog-trace-agent/Cargo.toml index 89bf65e2..bc1dc3ef 100644 --- a/crates/datadog-trace-agent/Cargo.toml +++ b/crates/datadog-trace-agent/Cargo.toml @@ -25,16 +25,16 @@ tracing = { version = "0.1", default-features = false } serde = { version = "1.0.145", features = ["derive"] } serde_json = "1.0" thiserror = { version = "1.0.58", default-features = false } -libdd-capabilities = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } -libdd-capabilities-impl = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } -libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } -libdd-library-config = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } -libdd-trace-protobuf = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } -libdd-trace-stats = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd", features = [ +libdd-capabilities = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } +libdd-capabilities-impl = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } +libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } +libdd-library-config = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } +libdd-trace-protobuf = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } +libdd-trace-stats = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f", features = [ "mini_agent", ] } -libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd" } +libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } datadog-fips = { path = "../datadog-fips" } reqwest = { version = "0.12.23", features = [ "json", @@ -48,6 +48,6 @@ serial_test = "2.0.0" duplicate = "2.0.1" temp-env = "0.3.6" tempfile = "3.3.0" -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "e4d93e5274f176e110dbacd745e922268ac31ccd", features = [ +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f", features = [ "test-utils", ] } From 0b0d5c28b79353237df6d7a1c6031fbef3f12625 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Tue, 28 Apr 2026 10:59:50 -0400 Subject: [PATCH 18/21] use service from spans, set default env, use multiple stats concentrators --- Cargo.lock | 28 +-- crates/datadog-agent-config/Cargo.toml | 4 +- crates/datadog-serverless-compat/Cargo.toml | 2 +- crates/datadog-trace-agent/Cargo.toml | 18 +- crates/datadog-trace-agent/src/config.rs | 5 +- .../src/stats_concentrator_service.rs | 185 +++++++++--------- .../datadog-trace-agent/src/stats_flusher.rs | 6 +- .../src/trace_processor.rs | 18 +- 8 files changed, 142 insertions(+), 124 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a03952d..5255a8de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1434,7 +1434,7 @@ checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libdd-capabilities" version = "1.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "anyhow", "bytes", @@ -1445,7 +1445,7 @@ dependencies = [ [[package]] name = "libdd-capabilities-impl" version = "1.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "bytes", "http", @@ -1488,7 +1488,7 @@ dependencies = [ [[package]] name = "libdd-common" version = "4.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "anyhow", "bytes", @@ -1561,7 +1561,7 @@ dependencies = [ [[package]] name = "libdd-ddsketch" version = "1.0.1" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "prost 0.14.3", ] @@ -1583,7 +1583,7 @@ dependencies = [ [[package]] name = "libdd-library-config" version = "1.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "anyhow", "libc", @@ -1600,7 +1600,7 @@ dependencies = [ [[package]] name = "libdd-shared-runtime" version = "0.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "async-trait", "futures", @@ -1648,7 +1648,7 @@ dependencies = [ [[package]] name = "libdd-tinybytes" version = "1.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "serde", ] @@ -1666,7 +1666,7 @@ dependencies = [ [[package]] name = "libdd-trace-normalization" version = "2.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "anyhow", "libdd-trace-protobuf 3.0.1", @@ -1675,7 +1675,7 @@ dependencies = [ [[package]] name = "libdd-trace-obfuscation" version = "2.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "anyhow", "fluent-uri", @@ -1703,7 +1703,7 @@ dependencies = [ [[package]] name = "libdd-trace-protobuf" version = "3.0.1" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "prost 0.14.3", "serde", @@ -1725,7 +1725,7 @@ dependencies = [ [[package]] name = "libdd-trace-stats" version = "2.0.0" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "anyhow", "async-trait", @@ -1734,7 +1734,7 @@ dependencies = [ "libdd-capabilities", "libdd-capabilities-impl", "libdd-common 4.0.0", - "libdd-ddsketch 1.0.1 (git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f)", + "libdd-ddsketch 1.0.1 (git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3)", "libdd-shared-runtime", "libdd-trace-protobuf 3.0.1", "libdd-trace-utils 3.0.1", @@ -1776,7 +1776,7 @@ dependencies = [ [[package]] name = "libdd-trace-utils" version = "3.0.1" -source = "git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f#971c407d856db58baf1078bd7802abe13bac4f9f" +source = "git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3#dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" dependencies = [ "anyhow", "base64 0.22.1", @@ -1795,7 +1795,7 @@ dependencies = [ "libdd-capabilities", "libdd-capabilities-impl", "libdd-common 4.0.0", - "libdd-tinybytes 1.1.0 (git+https://github.com/DataDog/libdatadog?rev=971c407d856db58baf1078bd7802abe13bac4f9f)", + "libdd-tinybytes 1.1.0 (git+https://github.com/DataDog/libdatadog?rev=dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3)", "libdd-trace-normalization 2.0.0", "libdd-trace-protobuf 3.0.1", "prost 0.14.3", diff --git a/crates/datadog-agent-config/Cargo.toml b/crates/datadog-agent-config/Cargo.toml index f1e2b37d..ebb69ad3 100644 --- a/crates/datadog-agent-config/Cargo.toml +++ b/crates/datadog-agent-config/Cargo.toml @@ -6,8 +6,8 @@ license.workspace = true [dependencies] figment = { version = "0.10", default-features = false, features = ["yaml", "env"] } -libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } +libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" } +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" } log = { version = "0.4", default-features = false } serde = { version = "1.0", default-features = false, features = ["derive"] } serde-aux = { version = "4.7", default-features = false } diff --git a/crates/datadog-serverless-compat/Cargo.toml b/crates/datadog-serverless-compat/Cargo.toml index 7aadae7c..4dc7fcc6 100644 --- a/crates/datadog-serverless-compat/Cargo.toml +++ b/crates/datadog-serverless-compat/Cargo.toml @@ -12,7 +12,7 @@ windows-pipes = ["datadog-trace-agent/windows-pipes", "dogstatsd/windows-pipes"] [dependencies] datadog-logs-agent = { path = "../datadog-logs-agent" } datadog-trace-agent = { path = "../datadog-trace-agent" } -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" } datadog-fips = { path = "../datadog-fips", default-features = false } dogstatsd = { path = "../dogstatsd", default-features = true } reqwest = { version = "0.12.4", default-features = false } diff --git a/crates/datadog-trace-agent/Cargo.toml b/crates/datadog-trace-agent/Cargo.toml index bc1dc3ef..22054334 100644 --- a/crates/datadog-trace-agent/Cargo.toml +++ b/crates/datadog-trace-agent/Cargo.toml @@ -25,16 +25,16 @@ tracing = { version = "0.1", default-features = false } serde = { version = "1.0.145", features = ["derive"] } serde_json = "1.0" thiserror = { version = "1.0.58", default-features = false } -libdd-capabilities = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } -libdd-capabilities-impl = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } -libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } -libdd-library-config = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } -libdd-trace-protobuf = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } -libdd-trace-stats = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f", features = [ +libdd-capabilities = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" } +libdd-capabilities-impl = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" } +libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" } +libdd-library-config = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" } +libdd-trace-protobuf = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" } +libdd-trace-stats = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" } +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3", features = [ "mini_agent", ] } -libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f" } +libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3" } datadog-fips = { path = "../datadog-fips" } reqwest = { version = "0.12.23", features = [ "json", @@ -48,6 +48,6 @@ serial_test = "2.0.0" duplicate = "2.0.1" temp-env = "0.3.6" tempfile = "3.3.0" -libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "971c407d856db58baf1078bd7802abe13bac4f9f", features = [ +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "dbd3ecb74cdad68d1efe3f0024cc3551f502a4c3", features = [ "test-utils", ] } diff --git a/crates/datadog-trace-agent/src/config.rs b/crates/datadog-trace-agent/src/config.rs index b1d38893..b929fe31 100644 --- a/crates/datadog-trace-agent/src/config.rs +++ b/crates/datadog-trace-agent/src/config.rs @@ -109,7 +109,6 @@ pub struct Config { /// timeout for environment verification, in milliseconds pub verify_env_timeout_ms: u64, pub proxy_url: Option, - pub service: Option, pub env: Option, } @@ -253,8 +252,7 @@ impl Config { .or_else(|_| env::var("HTTPS_PROXY")) .ok(), tags, - service: env::var("DD_SERVICE").ok(), - env: env::var("DD_ENV").ok(), + env: env::var("DD_ENV").ok().filter(|v| v != "none"), }) } } @@ -729,7 +727,6 @@ pub mod test_helpers { proxy_request_retry_backoff_base_ms: 100, verify_env_timeout_ms: 1000, proxy_url: None, - service: None, env: None, } } diff --git a/crates/datadog-trace-agent/src/stats_concentrator_service.rs b/crates/datadog-trace-agent/src/stats_concentrator_service.rs index 3f813188..88f9b1ad 100644 --- a/crates/datadog-trace-agent/src/stats_concentrator_service.rs +++ b/crates/datadog-trace-agent/src/stats_concentrator_service.rs @@ -1,6 +1,7 @@ // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use std::collections::HashMap; use std::sync::Arc; use tokio::sync::{mpsc, oneshot}; @@ -24,7 +25,7 @@ pub enum StatsError { pub enum ConcentratorCommand { AddChunk(Box, Arc), - Flush(bool, oneshot::Sender>), + Flush(bool, oneshot::Sender>), } /// A cloneable handle to the stats concentrator service, safe to share across async tasks. @@ -50,7 +51,7 @@ impl StatsConcentratorHandle { .map_err(|e| StatsError::SendError(Box::new(e))) } - pub async fn flush(&self, force_flush: bool) -> Result, StatsError> { + pub async fn flush(&self, force_flush: bool) -> Result, StatsError> { let (response_tx, response_rx) = oneshot::channel(); self.tx .send(ConcentratorCommand::Flush(force_flush, response_tx)) @@ -59,10 +60,20 @@ impl StatsConcentratorHandle { } } +fn new_concentrator() -> SpanConcentrator { + // TODO: set span_kinds_stats_computed and peer_tag_keys + SpanConcentrator::new( + Duration::from_nanos(BUCKET_DURATION_NS), + SystemTime::now(), + vec![], + vec![], + ) +} + pub struct StatsConcentratorService { - concentrator: SpanConcentrator, + /// One concentrator per unique TracerMetadata. + concentrators: HashMap, SpanConcentrator>, rx: mpsc::UnboundedReceiver, - tracer_metadata: Option>, config: Arc, } @@ -71,17 +82,9 @@ impl StatsConcentratorService { pub fn new(config: Arc) -> (Self, StatsConcentratorHandle) { let (tx, rx) = mpsc::unbounded_channel(); let handle = StatsConcentratorHandle::new(tx); - // TODO: set span_kinds_stats_computed and peer_tag_keys - let concentrator = SpanConcentrator::new( - Duration::from_nanos(BUCKET_DURATION_NS), - SystemTime::now(), - vec![], - vec![], - ); let service = Self { - concentrator, + concentrators: HashMap::new(), rx, - tracer_metadata: None, config, }; (service, handle) @@ -91,20 +94,13 @@ impl StatsConcentratorService { while let Some(command) = self.rx.recv().await { match command { ConcentratorCommand::AddChunk(chunk, metadata) => { - match &self.tracer_metadata { - None => self.tracer_metadata = Some(metadata), - Some(concentrator_metadata) - if concentrator_metadata.as_ref() != metadata.as_ref() => - { - error!( - "Multiple tracers detected: stats concentrator service received metadata from a different tracer. Expected: {concentrator_metadata:?}, received: {metadata:?}." - ); - return; - } - Some(_) => {} - } + let concentrator = self + .concentrators + .entry(Arc::clone(&metadata)) + .or_insert_with(new_concentrator); + for span in &chunk.spans { - self.concentrator.add_span(span); + concentrator.add_span(span); } } ConcentratorCommand::Flush(force_flush, response_tx) => { @@ -117,50 +113,52 @@ impl StatsConcentratorService { fn handle_flush( &mut self, force_flush: bool, - response_tx: oneshot::Sender>, + response_tx: oneshot::Sender>, ) { - let stats_buckets = self.concentrator.flush(SystemTime::now(), force_flush); - let stats = if stats_buckets.is_empty() { - None - } else { - let default_metadata = TracerMetadata::default(); - let metadata = self.tracer_metadata.as_deref().unwrap_or(&default_metadata); - Some(ClientStatsPayload { - // Do not set hostname so the trace stats backend can aggregate stats properly - hostname: String::new(), - // Prefer env from the tracer payload, fall back to agent config - env: metadata - .service_env - .clone() - .filter(|s| !s.is_empty()) - .or_else(|| self.config.env.clone()) - .unwrap_or_default(), - version: metadata.service_version.clone().unwrap_or_default(), - lang: metadata.tracer_language.clone(), - tracer_version: metadata.tracer_version.clone(), - // Not set for agent-computed stats; runtime_id identifies tracer-computed payloads - runtime_id: String::new(), - // Not supported yet - sequence: 0, - // Not supported yet - agent_aggregation: String::new(), - // One service per app for serverless - service: self.config.service.clone().unwrap_or_default(), - container_id: metadata.container_id.clone().unwrap_or_default(), - // Not supported yet - tags: vec![], - // Not supported yet - git_commit_sha: String::new(), - // Not supported yet - image_tag: String::new(), - stats: stats_buckets, - // Not supported yet - process_tags: String::new(), - // Not supported yet - process_tags_hash: 0, + let payloads = self + .concentrators + .iter_mut() + .filter_map(|(metadata, concentrator)| { + let stats_buckets = concentrator.flush(SystemTime::now(), force_flush); + if stats_buckets.is_empty() { + return None; + } + Some(ClientStatsPayload { + // Do not set hostname so the trace stats backend can aggregate stats properly + hostname: String::new(), + // Prefer env from the tracer payload, fall back to agent config + env: metadata + .service_env + .clone() + .filter(|s| !s.is_empty()) + .or_else(|| self.config.env.clone()) + .unwrap_or_default(), + version: metadata.service_version.clone().unwrap_or_default(), + lang: metadata.tracer_language.clone(), + tracer_version: metadata.tracer_version.clone(), + // Not set for agent-computed stats; runtime_id identifies tracer-computed payloads + runtime_id: String::new(), + // Not supported yet + sequence: 0, + // Not supported yet + agent_aggregation: String::new(), + service: metadata.service_name.clone().unwrap_or_default(), + container_id: metadata.container_id.clone().unwrap_or_default(), + // Not supported yet + tags: vec![], + // Not supported yet + git_commit_sha: String::new(), + // Not supported yet + image_tag: String::new(), + stats: stats_buckets, + // Not supported yet + process_tags: String::new(), + // Not supported yet + process_tags_hash: 0, + }) }) - }; - if let Err(e) = response_tx.send(stats) { + .collect(); + if let Err(e) = response_tx.send(payloads) { error!("Failed to return trace stats: {e:?}"); } } @@ -172,9 +170,11 @@ mod tests { use crate::config::test_helpers::create_tcp_test_config; use libdd_trace_protobuf::pb::TraceChunk; - fn make_metadata(language: &str) -> Arc { + fn make_metadata(language: &str, service: &str, version: &str) -> Arc { Arc::new(TracerMetadata { tracer_language: language.to_string(), + service_name: Some(service.to_string()), + service_version: Some(version.to_string()), ..Default::default() }) } @@ -187,45 +187,52 @@ mod tests { } #[tokio::test] - async fn test_stops_on_multiple_tracers() { + async fn test_unique_metadata_gets_separate_concentrators() { let config = Arc::new(create_tcp_test_config(0)); let (service, handle) = StatsConcentratorService::new(config); - let service_handle = tokio::spawn(service.run()); + tokio::spawn(service.run()); - // First tracer — sets metadata handle - .add_chunk(empty_chunk(), make_metadata("python")) + .add_chunk( + empty_chunk(), + make_metadata("python", "my-service", "1.0.0"), + ) .unwrap(); - - // Second tracer with different metadata — should stop the service handle - .add_chunk(empty_chunk(), make_metadata("nodejs")) + .add_chunk( + empty_chunk(), + make_metadata("python", "my-service", "2.0.0"), + ) .unwrap(); - - // Service task should complete promptly after detecting multiple tracers - tokio::time::timeout(std::time::Duration::from_secs(1), service_handle) - .await - .expect("service did not stop after detecting multiple tracers") + handle + .add_chunk( + empty_chunk(), + make_metadata("nodejs", "my-service", "1.0.0"), + ) .unwrap(); + + assert!(handle.flush(false).await.is_ok()); } #[tokio::test] - async fn test_continues_with_same_tracer() { + async fn test_continues_with_same_metadata() { let config = Arc::new(create_tcp_test_config(0)); let (service, handle) = StatsConcentratorService::new(config); - let service_handle = tokio::spawn(service.run()); + tokio::spawn(service.run()); handle - .add_chunk(empty_chunk(), make_metadata("python")) + .add_chunk( + empty_chunk(), + make_metadata("python", "my-service", "1.0.0"), + ) .unwrap(); handle - .add_chunk(empty_chunk(), make_metadata("python")) + .add_chunk( + empty_chunk(), + make_metadata("python", "my-service", "1.0.0"), + ) .unwrap(); - // Service should still be running — flush should succeed assert!(handle.flush(false).await.is_ok()); - - drop(handle); - let _ = service_handle.await; } } diff --git a/crates/datadog-trace-agent/src/stats_flusher.rs b/crates/datadog-trace-agent/src/stats_flusher.rs index 3320c0c6..27ba3871 100644 --- a/crates/datadog-trace-agent/src/stats_flusher.rs +++ b/crates/datadog-trace-agent/src/stats_flusher.rs @@ -122,12 +122,12 @@ impl StatsFlusher for ServerlessStatsFlusher { // Flush agent computed trace stats from the stats concentrator if let Some(ref concentrator) = self.stats_concentrator { match concentrator.flush(force_flush).await { - Ok(Some(agent_stats)) => { - let mut payload = stats_utils::construct_stats_payload(vec![agent_stats]); + Ok(agent_stats) if !agent_stats.is_empty() => { + let mut payload = stats_utils::construct_stats_payload(agent_stats); payload.client_computed = false; send_stats_payload(&config, payload).await; } - Ok(None) => {} + Ok(_) => {} Err(e) => error!("Failed to flush concentrator stats: {e}"), } } diff --git a/crates/datadog-trace-agent/src/trace_processor.rs b/crates/datadog-trace-agent/src/trace_processor.rs index 8b66486a..57f2ed20 100644 --- a/crates/datadog-trace-agent/src/trace_processor.rs +++ b/crates/datadog-trace-agent/src/trace_processor.rs @@ -78,13 +78,27 @@ impl ServerlessTraceProcessor { ) { if let TracerPayloadCollection::V07(tracer_payloads) = payload { for tracer_payload in tracer_payloads { + // Fetch service from the `_dd.base_service` attribute on the root span + let service_name = tracer_payload + .chunks + .iter() + .flat_map(|c| c.spans.iter()) + .find(|s| s.parent_id == 0) + .map(|s| { + s.meta + .get("_dd.base_service") + .filter(|v| !v.is_empty()) + .cloned() + .unwrap_or_else(|| s.service.clone()) + }) + .filter(|s| !s.is_empty()); let metadata = Arc::new(TracerMetadata { schema_version: 2, runtime_id: None, tracer_language: tracer_payload.language_name.clone(), tracer_version: tracer_payload.tracer_version.clone(), hostname: String::new(), - service_name: None, + service_name, service_env: Some(tracer_payload.env.clone()), service_version: Some(tracer_payload.app_version.clone()), process_tags: None, @@ -179,6 +193,7 @@ impl TraceProcessor for ServerlessTraceProcessor { } } + // Skip agent side stats computation if the tracer has already computed stats if let Some(ref concentrator) = self.stats_concentrator && !tracer_header_tags.client_computed_stats { @@ -265,7 +280,6 @@ mod tests { ..Default::default() }, tags: Tags::from_env_string("env:test,service:my-service"), - service: Some("test-service".to_string()), env: Some("test-env".to_string()), } } From 66c4b0bf86025eafe80d22635bbba8df33ce926c Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Tue, 28 Apr 2026 11:04:49 -0400 Subject: [PATCH 19/21] fix env handling --- crates/datadog-trace-agent/src/config.rs | 6 +++--- .../datadog-trace-agent/src/stats_concentrator_service.rs | 3 +-- crates/datadog-trace-agent/src/trace_processor.rs | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/crates/datadog-trace-agent/src/config.rs b/crates/datadog-trace-agent/src/config.rs index b929fe31..14f90ed7 100644 --- a/crates/datadog-trace-agent/src/config.rs +++ b/crates/datadog-trace-agent/src/config.rs @@ -109,7 +109,7 @@ pub struct Config { /// timeout for environment verification, in milliseconds pub verify_env_timeout_ms: u64, pub proxy_url: Option, - pub env: Option, + pub env: String, } impl Config { @@ -252,7 +252,7 @@ impl Config { .or_else(|_| env::var("HTTPS_PROXY")) .ok(), tags, - env: env::var("DD_ENV").ok().filter(|v| v != "none"), + env: env::var("DD_ENV").unwrap_or_else(|_| "none".to_string()), }) } } @@ -727,7 +727,7 @@ pub mod test_helpers { proxy_request_retry_backoff_base_ms: 100, verify_env_timeout_ms: 1000, proxy_url: None, - env: None, + env: "none".to_string(), } } } diff --git a/crates/datadog-trace-agent/src/stats_concentrator_service.rs b/crates/datadog-trace-agent/src/stats_concentrator_service.rs index 88f9b1ad..826e87d4 100644 --- a/crates/datadog-trace-agent/src/stats_concentrator_service.rs +++ b/crates/datadog-trace-agent/src/stats_concentrator_service.rs @@ -131,8 +131,7 @@ impl StatsConcentratorService { .service_env .clone() .filter(|s| !s.is_empty()) - .or_else(|| self.config.env.clone()) - .unwrap_or_default(), + .unwrap_or_else(|| self.config.env.clone()), version: metadata.service_version.clone().unwrap_or_default(), lang: metadata.tracer_language.clone(), tracer_version: metadata.tracer_version.clone(), diff --git a/crates/datadog-trace-agent/src/trace_processor.rs b/crates/datadog-trace-agent/src/trace_processor.rs index 57f2ed20..ff019b64 100644 --- a/crates/datadog-trace-agent/src/trace_processor.rs +++ b/crates/datadog-trace-agent/src/trace_processor.rs @@ -280,7 +280,7 @@ mod tests { ..Default::default() }, tags: Tags::from_env_string("env:test,service:my-service"), - env: Some("test-env".to_string()), + env: "test-env".to_string(), } } From 023674f4720646cf4702d9855917997bf5a5ea49 Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Tue, 28 Apr 2026 11:12:11 -0400 Subject: [PATCH 20/21] add comments --- crates/datadog-trace-agent/src/stats_concentrator_service.rs | 5 +++++ crates/datadog-trace-agent/src/trace_processor.rs | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/datadog-trace-agent/src/stats_concentrator_service.rs b/crates/datadog-trace-agent/src/stats_concentrator_service.rs index 826e87d4..e0330719 100644 --- a/crates/datadog-trace-agent/src/stats_concentrator_service.rs +++ b/crates/datadog-trace-agent/src/stats_concentrator_service.rs @@ -94,6 +94,11 @@ impl StatsConcentratorService { while let Some(command) = self.rx.recv().await { match command { ConcentratorCommand::AddChunk(chunk, metadata) => { + // A single tracer may produce payloads with different metadata depending on the + // integration. For example, the .NET process integration appends `-command` to + // the base service and omits the version. A separate concentrator is kept per + // unique metadata so that each payload's stats are flushed with the metadata + // from the originating payload. let concentrator = self .concentrators .entry(Arc::clone(&metadata)) diff --git a/crates/datadog-trace-agent/src/trace_processor.rs b/crates/datadog-trace-agent/src/trace_processor.rs index ff019b64..bc6f1fef 100644 --- a/crates/datadog-trace-agent/src/trace_processor.rs +++ b/crates/datadog-trace-agent/src/trace_processor.rs @@ -78,7 +78,8 @@ impl ServerlessTraceProcessor { ) { if let TracerPayloadCollection::V07(tracer_payloads) = payload { for tracer_payload in tracer_payloads { - // Fetch service from the `_dd.base_service` attribute on the root span + // Fetch service from the `_dd.base_service` attribute on the root span, + // falling back to `span.service` if not set let service_name = tracer_payload .chunks .iter() From 9894ef73f3f34617afa61206cc98a0c750e8b63f Mon Sep 17 00:00:00 2001 From: Duncan Harvey Date: Tue, 28 Apr 2026 11:18:03 -0400 Subject: [PATCH 21/21] only use base service for stats concentrator --- crates/datadog-trace-agent/src/trace_processor.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/crates/datadog-trace-agent/src/trace_processor.rs b/crates/datadog-trace-agent/src/trace_processor.rs index bc6f1fef..ff6bf790 100644 --- a/crates/datadog-trace-agent/src/trace_processor.rs +++ b/crates/datadog-trace-agent/src/trace_processor.rs @@ -78,21 +78,15 @@ impl ServerlessTraceProcessor { ) { if let TracerPayloadCollection::V07(tracer_payloads) = payload { for tracer_payload in tracer_payloads { - // Fetch service from the `_dd.base_service` attribute on the root span, - // falling back to `span.service` if not set + // Fetch service from the `_dd.base_service` attribute on the root span let service_name = tracer_payload .chunks .iter() .flat_map(|c| c.spans.iter()) .find(|s| s.parent_id == 0) - .map(|s| { - s.meta - .get("_dd.base_service") - .filter(|v| !v.is_empty()) - .cloned() - .unwrap_or_else(|| s.service.clone()) - }) - .filter(|s| !s.is_empty()); + .and_then(|s| s.meta.get("_dd.base_service")) + .filter(|v| !v.is_empty()) + .cloned(); let metadata = Arc::new(TracerMetadata { schema_version: 2, runtime_id: None,