From 92a005472116c53426dbf955d708dc0088ea64c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Mon, 26 Aug 2024 17:18:14 -0400 Subject: [PATCH 01/41] accept `datadog_wrapper` --- bottlecap/src/config/mod.rs | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index c2b2655db..3785843bb 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -125,13 +125,6 @@ fn failsover(figment: &Figment) -> Result<(), ConfigError> { )); } - let datadog_wrapper_set = - std::env::var("AWS_LAMBDA_EXEC_WRAPPER").unwrap_or_default() == "/opt/datadog_wrapper"; - if datadog_wrapper_set { - log_failover_reason("datadog_wrapper"); - return Err(ConfigError::UnsupportedField("datadog_wrapper".to_string())); - } - if failover_config.serverless_appsec_enabled || failover_config.appsec_enabled { log_failover_reason("appsec_enabled"); return Err(ConfigError::UnsupportedField("appsec_enabled".to_string())); @@ -224,22 +217,6 @@ pub mod tests { }); } - #[test] - fn test_reject_datadog_wrapper() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_EXTENSION_VERSION", "next"); - jail.set_env("AWS_LAMBDA_EXEC_WRAPPER", "/opt/datadog_wrapper"); - - let config = get_config(Path::new("")).expect_err("should reject unknown fields"); - assert_eq!( - config, - ConfigError::UnsupportedField("datadog_wrapper".to_string()) - ); - Ok(()) - }); - } - #[test] fn test_allowed_but_disabled() { figment::Jail::expect_with(|jail| { From 7806c3bdcf17f02ff60f46e8842d0f2b974cb524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Mon, 26 Aug 2024 17:19:00 -0400 Subject: [PATCH 02/41] Revert "accept `datadog_wrapper`" This reverts commit 9560657582f2f22c8e68af5d0bb9d7d2b0765650. --- bottlecap/src/config/mod.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index 3785843bb..c2b2655db 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -125,6 +125,13 @@ fn failsover(figment: &Figment) -> Result<(), ConfigError> { )); } + let datadog_wrapper_set = + std::env::var("AWS_LAMBDA_EXEC_WRAPPER").unwrap_or_default() == "/opt/datadog_wrapper"; + if datadog_wrapper_set { + log_failover_reason("datadog_wrapper"); + return Err(ConfigError::UnsupportedField("datadog_wrapper".to_string())); + } + if failover_config.serverless_appsec_enabled || failover_config.appsec_enabled { log_failover_reason("appsec_enabled"); return Err(ConfigError::UnsupportedField("appsec_enabled".to_string())); @@ -217,6 +224,22 @@ pub mod tests { }); } + #[test] + fn test_reject_datadog_wrapper() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_EXTENSION_VERSION", "next"); + jail.set_env("AWS_LAMBDA_EXEC_WRAPPER", "/opt/datadog_wrapper"); + + let config = get_config(Path::new("")).expect_err("should reject unknown fields"); + assert_eq!( + config, + ConfigError::UnsupportedField("datadog_wrapper".to_string()) + ); + Ok(()) + }); + } + #[test] fn test_allowed_but_disabled() { figment::Jail::expect_with(|jail| { From fb11d0aaa2c302c44789eb18da103dd1a73f9e9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Tue, 27 Aug 2024 10:42:08 -0400 Subject: [PATCH 03/41] accept `datadog_wrapper` (#373) --- bottlecap/src/config/mod.rs | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index c2b2655db..3785843bb 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -125,13 +125,6 @@ fn failsover(figment: &Figment) -> Result<(), ConfigError> { )); } - let datadog_wrapper_set = - std::env::var("AWS_LAMBDA_EXEC_WRAPPER").unwrap_or_default() == "/opt/datadog_wrapper"; - if datadog_wrapper_set { - log_failover_reason("datadog_wrapper"); - return Err(ConfigError::UnsupportedField("datadog_wrapper".to_string())); - } - if failover_config.serverless_appsec_enabled || failover_config.appsec_enabled { log_failover_reason("appsec_enabled"); return Err(ConfigError::UnsupportedField("appsec_enabled".to_string())); @@ -224,22 +217,6 @@ pub mod tests { }); } - #[test] - fn test_reject_datadog_wrapper() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_EXTENSION_VERSION", "next"); - jail.set_env("AWS_LAMBDA_EXEC_WRAPPER", "/opt/datadog_wrapper"); - - let config = get_config(Path::new("")).expect_err("should reject unknown fields"); - assert_eq!( - config, - ConfigError::UnsupportedField("datadog_wrapper".to_string()) - ); - Ok(()) - }); - } - #[test] fn test_allowed_but_disabled() { figment::Jail::expect_with(|jail| { From 8853b21817d5b5042d61f37e607518383325a9f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Tue, 27 Aug 2024 10:55:02 -0400 Subject: [PATCH 04/41] feat(bottlecap): add base for universal instrumentation (#367) * remove `hello_agent.rs` in favor of a later agent * create the `LifecycleListener` agent in charge of listening to lambda-library/tracer events, moved the `hello_agent` handler here * fmt --- bottlecap/src/bin/bottlecap/main.rs | 10 ++-- bottlecap/src/lifecycle/listener.rs | 79 +++++++++++++++++++++++++++++ bottlecap/src/lifecycle/mod.rs | 1 + bottlecap/src/traces/hello_agent.rs | 51 ------------------- bottlecap/src/traces/mod.rs | 1 - 5 files changed, 87 insertions(+), 55 deletions(-) create mode 100644 bottlecap/src/lifecycle/listener.rs delete mode 100644 bottlecap/src/traces/hello_agent.rs diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index c564d1a8a..7b5992bce 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -17,6 +17,7 @@ use bottlecap::{ lifecycle::{ flush_control::FlushControl, invocation_context::{InvocationContext, InvocationContextBuffer}, + listener::Listener as LifecycleListener, }, logger, logs::{ @@ -33,7 +34,6 @@ use bottlecap::{ listener::TelemetryListener, }, traces::{ - hello_agent, stats_flusher::{self, StatsFlusher}, stats_processor, trace_agent, trace_flusher::{self, TraceFlusher}, @@ -318,7 +318,7 @@ async fn extension_loop_active( trace_flusher: trace_flusher_clone, stats_processor, stats_flusher: stats_flusher_clone, - tags_provider, + tags_provider: Arc::clone(&tags_provider), }); tokio::spawn(async move { let res = trace_agent.start_trace_agent().await; @@ -326,9 +326,13 @@ async fn extension_loop_active( error!("Error starting trace agent: {e:?}"); } }); + + let lifecycle_listener = LifecycleListener { + tags_provider: Arc::clone(&tags_provider), + }; // TODO(astuyve): deprioritize this task after the first request tokio::spawn(async move { - let res = hello_agent::start_handler().await; + let res = lifecycle_listener.start().await; if let Err(e) = res { error!("Error starting hello agent: {e:?}"); } diff --git a/bottlecap/src/lifecycle/listener.rs b/bottlecap/src/lifecycle/listener.rs new file mode 100644 index 000000000..0afdab41c --- /dev/null +++ b/bottlecap/src/lifecycle/listener.rs @@ -0,0 +1,79 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use std::convert::Infallible; +use std::net::SocketAddr; +use std::sync::Arc; + +use hyper::service::{make_service_fn, service_fn}; +use hyper::{http, Body, Method, Request, Response, StatusCode}; +use serde_json::json; +use tracing::{error, warn}; + +use crate::tags::provider; + +const HELLO_PATH: &str = "/lambda/hello"; +const START_INVOCATION_PATH: &str = "/lambda/start-invocation"; +const END_INVOCATION_PATH: &str = "/lambda/end-invocation"; +const AGENT_PORT: usize = 8124; + +pub struct Listener { + pub tags_provider: Arc, +} + +impl Listener { + pub async fn start(&self) -> Result<(), Box> { + let make_svc = make_service_fn(move |_| { + let service = service_fn(Self::handler); + + async move { Ok::<_, Infallible>(service) } + }); + + let port = u16::try_from(AGENT_PORT).expect("AGENT_PORT is too large"); + let addr = SocketAddr::from(([127, 0, 0, 1], port)); + let server_builder = hyper::Server::try_bind(&addr)?; + + let server = server_builder.serve(make_svc); + + // start hyper http server + if let Err(e) = server.await { + error!("Failed to start the Lifecycle Listener {e}"); + return Err(e.into()); + } + + Ok(()) + } + + #[allow(clippy::unused_async)] + async fn handler(req: Request) -> http::Result> { + match (req.method(), req.uri().path()) { + (&Method::POST, START_INVOCATION_PATH) => Self::start_invocation_handler(req), + (&Method::POST, END_INVOCATION_PATH) => Self::end_invocation_handler(req), + (&Method::GET, HELLO_PATH) => Self::hello_handler(), + _ => { + let mut not_found = Response::default(); + *not_found.status_mut() = StatusCode::NOT_FOUND; + Ok(not_found) + } + } + } + + fn start_invocation_handler(_: Request) -> http::Result> { + Response::builder() + .status(200) + .body(Body::from(json!({}).to_string())) + } + + fn end_invocation_handler(_: Request) -> http::Result> { + Response::builder() + .status(200) + .body(Body::from(json!({}).to_string())) + } + + fn hello_handler() -> http::Result> { + warn!("[DEPRECATED] Please upgrade your tracing library, the /hello route is deprecated"); + Response::builder() + .status(200) + .body(Body::from(json!({}).to_string())) + } +} diff --git a/bottlecap/src/lifecycle/mod.rs b/bottlecap/src/lifecycle/mod.rs index 1c0924d84..a35e20622 100644 --- a/bottlecap/src/lifecycle/mod.rs +++ b/bottlecap/src/lifecycle/mod.rs @@ -1,2 +1,3 @@ pub mod flush_control; pub mod invocation_context; +pub mod listener; diff --git a/bottlecap/src/traces/hello_agent.rs b/bottlecap/src/traces/hello_agent.rs deleted file mode 100644 index c3584ccf3..000000000 --- a/bottlecap/src/traces/hello_agent.rs +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -// TODO(Astuyve): Deprecate. -// older clients require the 127.0.0.1:8126/lambda/hello route -// to identify the presence of the extension. - -use hyper::service::{make_service_fn, service_fn}; -use hyper::{http, Body, Method, Request, Response, Server, StatusCode}; -use serde_json::json; -use std::convert::Infallible; -use std::net::SocketAddr; -use tracing::{error, warn}; - -const HELLO_PATH: &str = "/lambda/hello"; -const AGENT_PORT: usize = 8124; - -pub async fn start_handler() -> Result<(), Box> { - let make_svc = make_service_fn(move |_| { - let service = service_fn(hello_handler); - - async move { Ok::<_, Infallible>(service) } - }); - - let port = u16::try_from(AGENT_PORT).expect("AGENT_PORT is too large"); - let addr = SocketAddr::from(([127, 0, 0, 1], port)); - let server_builder = Server::try_bind(&addr)?; - - let server = server_builder.serve(make_svc); - - // start hyper http server - if let Err(e) = server.await { - error!("Server error: {e}"); - return Err(e.into()); - } - - Ok(()) -} - -async fn hello_handler(req: Request) -> http::Result> { - if let (&Method::GET, HELLO_PATH) = (req.method(), req.uri().path()) { - warn!("[DEPRECATED] Please upgrade your tracing library, the /hello route is deprecated"); - Response::builder() - .status(200) - .body(Body::from(json!({}).to_string())) - } else { - let mut not_found = Response::default(); - *not_found.status_mut() = StatusCode::NOT_FOUND; - Ok(not_found) - } -} diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index b70d26a83..8545fbe40 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -1,7 +1,6 @@ // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -pub mod hello_agent; pub mod stats_flusher; pub mod stats_processor; pub mod trace_agent; From 78f5a6401d1b8632b385948fcdb9e25d2bd37755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:39:25 -0400 Subject: [PATCH 05/41] feat(bottlecap): add invocation span (#394) * decouple `hyper` from `trace_processor` * add `handle_traces` * fix tests * removed unused import * move `invocation_context` to `invocation::context` module also added some more fields and refactored it * add `new` and `get_sender_copy` to `trace_agent` * add `get_canonical_resource_name` to `tags_provider` * add `get_function_name` to `lambda::tags` * add `MS_TO_NS` constant * add `invocation::processor` * update use of `invocation::context` * make `lifecycle::listener` to use `invocation::processor` * use `invocation::processor` in `main.rs` * move `MS_TO_NS` to `invocation::processor` * remove unnecessary constant * add `Box::new` back to `trace_agent` * add some comments * add unit tests for `context.rs` * use `on_invocation_start` * rename `lambda_library_detected` to `tracer_detected` * fmt * remove `current_request_id` I think we dont need it * add comment * fmt --- bottlecap/src/bin/bottlecap/main.rs | 183 ++++++----- bottlecap/src/lifecycle/invocation/context.rs | 298 ++++++++++++++++++ bottlecap/src/lifecycle/invocation/mod.rs | 2 + .../src/lifecycle/invocation/processor.rs | 164 ++++++++++ bottlecap/src/lifecycle/invocation_context.rs | 72 ----- bottlecap/src/lifecycle/listener.rs | 81 ++++- bottlecap/src/lifecycle/mod.rs | 2 +- bottlecap/src/logs/lambda/processor.rs | 7 +- bottlecap/src/metrics/enhanced/constants.rs | 2 +- bottlecap/src/tags/lambda/tags.rs | 5 + bottlecap/src/tags/provider.rs | 12 + bottlecap/src/traces/trace_agent.rs | 35 +- 12 files changed, 684 insertions(+), 179 deletions(-) create mode 100644 bottlecap/src/lifecycle/invocation/context.rs create mode 100644 bottlecap/src/lifecycle/invocation/mod.rs create mode 100644 bottlecap/src/lifecycle/invocation/processor.rs delete mode 100644 bottlecap/src/lifecycle/invocation_context.rs diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 7b5992bce..8993e3307 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -15,8 +15,7 @@ use bottlecap::{ event_bus::bus::EventBus, events::Event, lifecycle::{ - flush_control::FlushControl, - invocation_context::{InvocationContext, InvocationContextBuffer}, + flush_control::FlushControl, invocation::processor::Processor as InvocationProcessor, listener::Listener as LifecycleListener, }, logger, @@ -294,6 +293,11 @@ async fn extension_loop_active( let trace_flusher = Arc::new(trace_flusher::ServerlessTraceFlusher { buffer: Arc::new(TokioMutex::new(Vec::new())), }); + + let invocation_processor = Arc::new(TokioMutex::new(InvocationProcessor::new( + Arc::clone(&tags_provider), + Arc::clone(config), + ))); let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor { obfuscation_config: Arc::new( obfuscation_config::ObfuscationConfig::new() @@ -312,23 +316,28 @@ async fn extension_loop_active( let trace_flusher_clone = trace_flusher.clone(); let stats_flusher_clone = stats_flusher.clone(); - let trace_agent = Box::new(trace_agent::TraceAgent { - config: Arc::clone(config), - trace_processor, - trace_flusher: trace_flusher_clone, - stats_processor, - stats_flusher: stats_flusher_clone, - tags_provider: Arc::clone(&tags_provider), - }); + let trace_agent = Box::new( + trace_agent::TraceAgent::new( + Arc::clone(config), + trace_processor.clone(), + trace_flusher_clone, + stats_processor, + stats_flusher_clone, + Arc::clone(&tags_provider), + ) + .await, + ); + let trace_agent_tx = trace_agent.get_sender_copy(); + tokio::spawn(async move { - let res = trace_agent.start_trace_agent().await; + let res = trace_agent.start().await; if let Err(e) = res { error!("Error starting trace agent: {e:?}"); } }); let lifecycle_listener = LifecycleListener { - tags_provider: Arc::clone(&tags_provider), + invocation_processor: Arc::clone(&invocation_processor), }; // TODO(astuyve): deprioritize this task after the first request tokio::spawn(async move { @@ -346,7 +355,6 @@ async fn extension_loop_active( setup_telemetry_client(&r.extension_id, logs_agent_channel).await?; let flush_control = FlushControl::new(config.serverless_flush_strategy); - let mut invocation_context_buffer = InvocationContextBuffer::default(); let mut shutdown = false; let mut flush_interval = flush_control.get_flush_interval(); @@ -389,92 +397,95 @@ async fn extension_loop_active( Event::Metric(event) => { debug!("Metric event: {:?}", event); } - Event::Telemetry(event) => match event.record { - TelemetryRecord::PlatformStart { request_id, .. } => { - invocation_context_buffer.insert(InvocationContext { - request_id, - runtime_duration_ms: 0.0, - }); - } - TelemetryRecord::PlatformInitReport { - initialization_type, - phase, - metrics, - } => { - debug!("Platform init report for initialization_type: {:?} with phase: {:?} and metrics: {:?}", initialization_type, phase, metrics); - lambda_enhanced_metrics - .set_init_duration_metric(metrics.duration_ms); - } - TelemetryRecord::PlatformRuntimeDone { - request_id, - status, - metrics, - .. - } => { - if let Some(metrics) = metrics { - invocation_context_buffer - .add_runtime_duration(&request_id, metrics.duration_ms); + Event::Telemetry(event) => + match event.record { + TelemetryRecord::PlatformStart { request_id, .. } => { + let mut p = invocation_processor.lock().await; + p.on_platform_start(request_id, event.time); + drop(p); + } + TelemetryRecord::PlatformInitReport { + initialization_type, + phase, + metrics, + } => { + debug!("Platform init report for initialization_type: {:?} with phase: {:?} and metrics: {:?}", initialization_type, phase, metrics); lambda_enhanced_metrics - .set_runtime_duration_metric(metrics.duration_ms); + .set_init_duration_metric(metrics.duration_ms); } + TelemetryRecord::PlatformRuntimeDone { + request_id, + status, + metrics, + .. + } => { + let mut p = invocation_processor.lock().await; + if let Some(metrics) = metrics { + p.on_platform_runtime_done( + &request_id, + metrics.duration_ms, + config.clone(), + tags_provider.clone(), + trace_processor.clone(), + trace_agent_tx.clone() + ).await; + lambda_enhanced_metrics + .set_runtime_duration_metric(metrics.duration_ms); + } + drop(p); - if status != Status::Success { - lambda_enhanced_metrics.increment_errors_metric(); - if status == Status::Timeout { - lambda_enhanced_metrics.increment_timeout_metric(); + if status != Status::Success { + lambda_enhanced_metrics.increment_errors_metric(); + if status == Status::Timeout { + lambda_enhanced_metrics.increment_timeout_metric(); + } } - } - debug!( - "Runtime done for request_id: {:?} with status: {:?}", - request_id, status - ); - // TODO(astuyve) it'll be easy to - // pass the invocation deadline to - // flush tasks here, so they can - // retry if we have more time - if flush_control.should_flush_end() { - tokio::join!( - logs_flusher.flush(), - metrics_flusher.flush(), - trace_flusher.manual_flush(), - stats_flusher.manual_flush() + debug!( + "Runtime done for request_id: {:?} with status: {:?}", + request_id, status ); + + // TODO(astuyve) it'll be easy to + // pass the invocation deadline to + // flush tasks here, so they can + // retry if we have more time + if flush_control.should_flush_end() { + tokio::join!( + logs_flusher.flush(), + metrics_flusher.flush(), + trace_flusher.manual_flush(), + stats_flusher.manual_flush() + ); + } + break; } - break; - } - TelemetryRecord::PlatformReport { - request_id, - status, - metrics, - .. - } => { - debug!( - "Platform report for request_id: {:?} with status: {:?}", - request_id, status - ); - lambda_enhanced_metrics.set_report_log_metrics(&metrics); - if let Some(invocation_context) = - invocation_context_buffer.remove(&request_id) - { - if invocation_context.runtime_duration_ms > 0.0 { - let post_runtime_duration_ms = metrics.duration_ms - - invocation_context.runtime_duration_ms; + TelemetryRecord::PlatformReport { + request_id, + status, + metrics, + .. + } => { + debug!( + "Platform report for request_id: {:?} with status: {:?}", + request_id, status + ); + lambda_enhanced_metrics.set_report_log_metrics(&metrics); + let mut p = invocation_processor.lock().await; + if let Some(post_runtime_duration_ms) = p.on_platform_report(&request_id, metrics.duration_ms) { lambda_enhanced_metrics.set_post_runtime_duration_metric( post_runtime_duration_ms, ); - } else { - debug!("Impossible to compute post runtime duration for request_id: {:?}", request_id); } - } + drop(p); - if shutdown { - break; + if shutdown { + break; + } + } + _ => { + debug!("Unforwarded Telemetry event: {:?}", event); } } - _ => { - debug!("Unforwarded Telemetry event: {:?}", event); - } - }, } } _ = flush_interval.tick() => { diff --git a/bottlecap/src/lifecycle/invocation/context.rs b/bottlecap/src/lifecycle/invocation/context.rs new file mode 100644 index 000000000..a1c74bc57 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/context.rs @@ -0,0 +1,298 @@ +use std::collections::VecDeque; + +use tracing::debug; + +#[derive(Debug, Clone, PartialEq)] +pub struct Context { + pub request_id: String, + pub runtime_duration_ms: f64, + pub init_duration_ms: f64, + pub start_time: i64, +} + +impl Context { + #[must_use] + pub fn new( + request_id: String, + runtime_duration_ms: f64, + init_duration_ms: f64, + start_time: i64, + ) -> Self { + Context { + request_id, + runtime_duration_ms, + init_duration_ms, + start_time, + } + } +} + +#[allow(clippy::module_name_repetitions)] +pub struct ContextBuffer { + buffer: VecDeque, +} + +impl Default for ContextBuffer { + /// Creates a new `ContextBuffer` with a default capacity of 5. + /// + fn default() -> Self { + ContextBuffer { + buffer: VecDeque::::with_capacity(5), + } + } +} + +impl ContextBuffer { + #[allow(dead_code)] + fn with_capacity(capacity: usize) -> Self { + ContextBuffer { + buffer: VecDeque::::with_capacity(capacity), + } + } + + /// Inserts a context into the buffer. If the buffer is full, the oldest `Context` is removed. + /// + fn insert(&mut self, context: Context) { + if self.size() == self.buffer.capacity() { + self.buffer.pop_front(); + self.buffer.push_back(context); + } else { + if self.get(&context.request_id).is_some() { + self.remove(&context.request_id); + } + + self.buffer.push_back(context); + } + } + + /// Removes a context from the buffer. Returns the removed `Context` if found. + /// + pub fn remove(&mut self, request_id: &String) -> Option { + if let Some(i) = self + .buffer + .iter() + .position(|context| context.request_id == *request_id) + { + return self.buffer.remove(i); + } + debug!("Context for request_id: {:?} not found", request_id); + + None + } + + /// Returns a reference to a `Context` from the buffer if found. + /// + #[must_use] + pub fn get(&self, request_id: &String) -> Option<&Context> { + self.buffer + .iter() + .find(|context| context.request_id == *request_id) + } + + /// Adds the init duration to a `Context` in the buffer. If the `Context` is not found, a new + /// `Context` is created and added to the buffer. + /// + pub fn add_init_duration(&mut self, request_id: &String, init_duration_ms: f64) { + if let Some(context) = self + .buffer + .iter_mut() + .find(|context| context.request_id == *request_id) + { + context.init_duration_ms = init_duration_ms; + } else { + self.insert(Context::new(request_id.clone(), 0.0, init_duration_ms, 0)); + } + } + + /// Adds the start time to a `Context` in the buffer. If the `Context` is not found, a new + /// `Context` is created and added to the buffer. + /// + pub fn add_start_time(&mut self, request_id: &String, start_time: i64) { + if let Some(context) = self + .buffer + .iter_mut() + .find(|context| context.request_id == *request_id) + { + context.start_time = start_time; + } else { + self.insert(Context::new(request_id.clone(), 0.0, 0.0, start_time)); + } + } + + /// Adds the runtime duration to a `Context` in the buffer. If the `Context` is not found, a new + /// `Context` is created and added to the buffer. + /// + pub fn add_runtime_duration(&mut self, request_id: &String, runtime_duration_ms: f64) { + if let Some(context) = self + .buffer + .iter_mut() + .find(|context| context.request_id == *request_id) + { + context.runtime_duration_ms = runtime_duration_ms; + } else { + self.insert(Context::new( + request_id.clone(), + runtime_duration_ms, + 0.0, + 0, + )); + } + } + + /// Returns the size of the buffer. + /// + #[must_use] + pub fn size(&self) -> usize { + self.buffer.len() + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + + #[test] + fn test_insert() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + let request_id_2 = String::from("2"); + let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 2); + assert_eq!(buffer.get(&request_id_2).unwrap(), &context); + + // This should replace the first context + let request_id_3 = String::from("3"); + let context = Context::new(request_id_3.clone(), 0.0, 0.0, 0); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 2); + assert_eq!(buffer.get(&request_id_3).unwrap(), &context); + + // First context should be None + assert!(buffer.get(&request_id).is_none()); + } + + #[test] + fn test_remove() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + let request_id_2 = String::from("2"); + let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 2); + assert_eq!(buffer.get(&request_id_2).unwrap(), &context); + + // Remove the first context + assert_eq!(buffer.remove(&request_id).unwrap().request_id, request_id); + // Size is reduced by 1 + assert_eq!(buffer.size(), 1); + assert!(buffer.get(&request_id).is_none()); + + // Remove a context that doesn't exist + let unexistent_request_id = String::from("unexistent"); + assert!(buffer.remove(&unexistent_request_id).is_none()); + } + + #[test] + fn test_get() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + let request_id_2 = String::from("2"); + let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 2); + assert_eq!(buffer.get(&request_id_2).unwrap(), &context); + + // Get a context that doesn't exist + let unexistent_request_id = String::from("unexistent"); + assert!(buffer.get(&unexistent_request_id).is_none()); + } + + #[test] + fn test_add_init_duration() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + buffer.add_init_duration(&request_id, 100.0); + assert_eq!(buffer.get(&request_id).unwrap().init_duration_ms, 100.0); + + // Add init duration to a context that doesn't exist + let unexistent_request_id = String::from("unexistent"); + buffer.add_init_duration(&unexistent_request_id, 200.0); + assert_eq!(buffer.size(), 2); + assert_eq!( + buffer.get(&unexistent_request_id).unwrap().init_duration_ms, + 200.0 + ); + } + + #[test] + fn test_add_start_time() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + buffer.add_start_time(&request_id, 100); + assert_eq!(buffer.get(&request_id).unwrap().start_time, 100); + + // Add start time to a context that doesn't exist + let unexistent_request_id = String::from("unexistent"); + buffer.add_start_time(&unexistent_request_id, 200); + assert_eq!(buffer.size(), 2); + assert_eq!(buffer.get(&unexistent_request_id).unwrap().start_time, 200); + } + + #[test] + fn test_add_runtime_duration() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + buffer.add_runtime_duration(&request_id, 100.0); + assert_eq!(buffer.get(&request_id).unwrap().runtime_duration_ms, 100.0); + + // Add runtime duration to a context that doesn't exist + let unexistent_request_id = String::from("unexistent"); + buffer.add_runtime_duration(&unexistent_request_id, 200.0); + assert_eq!(buffer.size(), 2); + assert_eq!( + buffer + .get(&unexistent_request_id) + .unwrap() + .runtime_duration_ms, + 200.0 + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/mod.rs b/bottlecap/src/lifecycle/invocation/mod.rs new file mode 100644 index 000000000..bf32ed105 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/mod.rs @@ -0,0 +1,2 @@ +pub mod context; +pub mod processor; diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs new file mode 100644 index 000000000..e553150d1 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -0,0 +1,164 @@ +use std::{ + collections::HashMap, + sync::Arc, + time::{SystemTime, UNIX_EPOCH}, +}; + +use chrono::{DateTime, Utc}; +use datadog_trace_protobuf::pb::Span; +use datadog_trace_utils::{send_data::SendData, tracer_header_tags}; +use tokio::sync::mpsc::Sender; +use tracing::debug; + +use crate::{ + config, lifecycle::invocation::context::ContextBuffer, tags::provider, traces::trace_processor, +}; + +pub const MS_TO_NS: f64 = 1_000_000.0; + +pub struct Processor { + pub context_buffer: ContextBuffer, + pub span: Span, + tracer_detected: bool, +} + +impl Processor { + #[must_use] + pub fn new(tags_provider: Arc, config: Arc) -> Self { + let service = config.service.clone().unwrap_or("aws.lambda".to_string()); + let resource = tags_provider + .get_canonical_resource_name() + .unwrap_or("aws_lambda".to_string()); + + Processor { + context_buffer: ContextBuffer::default(), + span: Span { + service, + name: "aws.lambda".to_string(), + resource, + trace_id: 0, // set later + span_id: 0, // maybe set later? + parent_id: 0, // set later + start: 0, // set later + duration: 0, // set later + error: 0, + meta: HashMap::new(), + metrics: HashMap::new(), + r#type: "serverless".to_string(), + meta_struct: HashMap::new(), + span_links: Vec::new(), + }, + tracer_detected: false, + } + } + + /// Given a `request_id` and the time of the platform start, add the start time to the context buffer. + /// + /// Also, set the start time of the current span. + /// + pub fn on_platform_start(&mut self, request_id: String, time: DateTime) { + let start_time: i64 = SystemTime::from(time) + .duration_since(UNIX_EPOCH) + .expect("time went backwards") + .as_nanos() + .try_into() + .unwrap_or_default(); + self.context_buffer.add_start_time(&request_id, start_time); + self.span.start = start_time; + } + + #[allow(clippy::cast_possible_truncation)] + pub async fn on_platform_runtime_done( + &mut self, + request_id: &String, + duration_ms: f64, + config: Arc, + tags_provider: Arc, + trace_processor: Arc, + trace_agent_tx: Sender, + ) { + self.context_buffer + .add_runtime_duration(request_id, duration_ms); + + if let Some(context) = self.context_buffer.get(request_id) { + let span = &mut self.span; + // `round` is intentionally meant to be a whole integer + span.duration = (context.runtime_duration_ms * MS_TO_NS).round() as i64; + span.meta + .insert("request_id".to_string(), request_id.clone()); + + // todo(duncanista): add missing tags + // - cold start, proactive init + // - language + // - function.request - capture lambda payload + // - function.response + // - error.msg + // - error.type + // - error.stack + // - trigger tags (from inferred spans) + // - metrics tags (for asm) + } + + if self.tracer_detected { + let span_size = std::mem::size_of_val(&self.span); + + // todo: figure out what to do here + let header_tags = tracer_header_tags::TracerHeaderTags { + lang: "", + lang_version: "", + lang_interpreter: "", + lang_vendor: "", + tracer_version: "", + container_id: "", + client_computed_top_level: false, + client_computed_stats: false, + }; + + let send_data = trace_processor.process_traces( + config.clone(), + tags_provider.clone(), + header_tags, + vec![vec![self.span.clone()]], + span_size, + ); + + if let Err(e) = trace_agent_tx.send(send_data).await { + debug!("Failed to send invocation span to agent: {e}"); + } + } + } + + /// Given a `request_id` and the duration in milliseconds of the platform report, + /// calculate the duration of the runtime if the `request_id` is found in the context buffer. + /// + /// If the `request_id` is not found in the context buffer, return `None`. + /// If the `runtime_duration_ms` hasn't been seen, return `None`. + /// + pub fn on_platform_report(&mut self, request_id: &String, duration_ms: f64) -> Option { + if let Some(context) = self.context_buffer.remove(request_id) { + if context.runtime_duration_ms == 0.0 { + return None; + } + + return Some(duration_ms - context.runtime_duration_ms); + } + + None + } + + /// If this method is called, it means that we are operating in a Universally Instrumented + /// runtime. Therefore, we need to set the `tracer_detected` flag to `true`. + /// + pub fn on_invocation_start(&mut self) { + self.tracer_detected = true; + } + + /// Given trace context information, set it to the current span. + /// + pub fn on_invocation_end(&mut self, trace_id: u64, span_id: u64, parent_id: u64) { + let span = &mut self.span; + span.trace_id = trace_id; + span.span_id = span_id; + span.parent_id = parent_id; + } +} diff --git a/bottlecap/src/lifecycle/invocation_context.rs b/bottlecap/src/lifecycle/invocation_context.rs deleted file mode 100644 index 24e8e4541..000000000 --- a/bottlecap/src/lifecycle/invocation_context.rs +++ /dev/null @@ -1,72 +0,0 @@ -use std::collections::VecDeque; - -use tracing::debug; - -#[derive(Debug, Clone)] -pub struct InvocationContext { - pub request_id: String, - pub runtime_duration_ms: f64, -} - -#[allow(clippy::module_name_repetitions)] -pub struct InvocationContextBuffer { - buffer: VecDeque, -} - -impl Default for InvocationContextBuffer { - fn default() -> Self { - InvocationContextBuffer { - buffer: VecDeque::::with_capacity(5), - } - } -} - -impl InvocationContextBuffer { - pub fn insert(&mut self, invocation_context: InvocationContext) { - if self.buffer.len() == self.buffer.capacity() { - self.buffer.pop_front(); - self.buffer.push_back(invocation_context); - } else { - if self.get(&invocation_context.request_id).is_some() { - self.remove(&invocation_context.request_id); - } - - self.buffer.push_back(invocation_context); - } - } - - pub fn remove(&mut self, request_id: &String) -> Option { - if let Some(i) = self - .buffer - .iter() - .position(|context| context.request_id == *request_id) - { - return self.buffer.remove(i); - } - debug!("Context for request_id: {:?} not found", request_id); - - None - } - - #[must_use] - pub fn get(&self, request_id: &String) -> Option<&InvocationContext> { - self.buffer - .iter() - .find(|context| context.request_id == *request_id) - } - - pub fn add_runtime_duration(&mut self, request_id: &String, runtime_duration_ms: f64) { - if let Some(context) = self - .buffer - .iter_mut() - .find(|context| context.request_id == *request_id) - { - context.runtime_duration_ms = runtime_duration_ms; - } else { - self.insert(InvocationContext { - request_id: request_id.to_string(), - runtime_duration_ms, - }); - } - } -} diff --git a/bottlecap/src/lifecycle/listener.rs b/bottlecap/src/lifecycle/listener.rs index 0afdab41c..ebb3dd833 100644 --- a/bottlecap/src/lifecycle/listener.rs +++ b/bottlecap/src/lifecycle/listener.rs @@ -8,9 +8,10 @@ use std::sync::Arc; use hyper::service::{make_service_fn, service_fn}; use hyper::{http, Body, Method, Request, Response, StatusCode}; use serde_json::json; -use tracing::{error, warn}; +use tokio::sync::Mutex; +use tracing::{debug, error, warn}; -use crate::tags::provider; +use crate::lifecycle::invocation::processor::Processor as InvocationProcessor; const HELLO_PATH: &str = "/lambda/hello"; const START_INVOCATION_PATH: &str = "/lambda/start-invocation"; @@ -18,13 +19,17 @@ const END_INVOCATION_PATH: &str = "/lambda/end-invocation"; const AGENT_PORT: usize = 8124; pub struct Listener { - pub tags_provider: Arc, + pub invocation_processor: Arc>, } impl Listener { pub async fn start(&self) -> Result<(), Box> { + let invocation_processor = self.invocation_processor.clone(); + let make_svc = make_service_fn(move |_| { - let service = service_fn(Self::handler); + let invocation_processor = invocation_processor.clone(); + + let service = service_fn(move |req| Self::handler(req, invocation_processor.clone())); async move { Ok::<_, Infallible>(service) } }); @@ -44,11 +49,26 @@ impl Listener { Ok(()) } - #[allow(clippy::unused_async)] - async fn handler(req: Request) -> http::Result> { + async fn handler( + req: Request, + invocation_processor: Arc>, + ) -> http::Result> { match (req.method(), req.uri().path()) { - (&Method::POST, START_INVOCATION_PATH) => Self::start_invocation_handler(req), - (&Method::POST, END_INVOCATION_PATH) => Self::end_invocation_handler(req), + (&Method::POST, START_INVOCATION_PATH) => { + Self::start_invocation_handler(req, invocation_processor).await + } + (&Method::POST, END_INVOCATION_PATH) => { + match Self::end_invocation_handler(req, invocation_processor).await { + Ok(response) => Ok(response), + Err(e) => { + error!("Failed to end invocation {e}"); + Ok(Response::builder() + .status(500) + .body(Body::empty()) + .expect("no body")) + } + } + } (&Method::GET, HELLO_PATH) => Self::hello_handler(), _ => { let mut not_found = Response::default(); @@ -58,13 +78,54 @@ impl Listener { } } - fn start_invocation_handler(_: Request) -> http::Result> { + async fn start_invocation_handler( + _: Request, + invocation_processor: Arc>, + ) -> http::Result> { + debug!("Received start invocation request"); + let mut processor = invocation_processor.lock().await; + processor.on_invocation_start(); + drop(processor); + Response::builder() .status(200) .body(Body::from(json!({}).to_string())) } - fn end_invocation_handler(_: Request) -> http::Result> { + async fn end_invocation_handler( + req: Request, + invocation_processor: Arc>, + ) -> http::Result> { + debug!("Received end invocation request"); + let (parts, _) = req.into_parts(); + let headers = parts.headers; + + let mut processor = invocation_processor.lock().await; + + let mut trace_id = 0; + if let Some(header) = headers.get("x-datadog-trace-id") { + if let Ok(header_value) = header.to_str() { + trace_id = header_value.parse::().unwrap_or(0); + } + } + + let mut span_id = 0; + if let Some(header) = headers.get("x-datadog-span-id") { + if let Ok(header_value) = header.to_str() { + span_id = header_value.parse::().unwrap_or(0); + } + } + + let mut parent_id = 0; + if let Some(header) = headers.get("x-datadog-parent-id") { + if let Ok(header_value) = header.to_str() { + parent_id = header_value.parse::().unwrap_or(0); + } + } + + processor.on_invocation_end(trace_id, span_id, parent_id); + drop(processor); + Response::builder() .status(200) .body(Body::from(json!({}).to_string())) diff --git a/bottlecap/src/lifecycle/mod.rs b/bottlecap/src/lifecycle/mod.rs index a35e20622..a0b3eda68 100644 --- a/bottlecap/src/lifecycle/mod.rs +++ b/bottlecap/src/lifecycle/mod.rs @@ -1,3 +1,3 @@ pub mod flush_control; -pub mod invocation_context; +pub mod invocation; pub mod listener; diff --git a/bottlecap/src/logs/lambda/processor.rs b/bottlecap/src/logs/lambda/processor.rs index 955872d8f..d3b519682 100644 --- a/bottlecap/src/logs/lambda/processor.rs +++ b/bottlecap/src/logs/lambda/processor.rs @@ -6,7 +6,7 @@ use tracing::error; use crate::config; use crate::events::Event; -use crate::lifecycle::invocation_context::InvocationContext; +use crate::lifecycle::invocation::context::Context as InvocationContext; use crate::logs::aggregator::Aggregator; use crate::logs::processor::{Processor, Rule}; use crate::tags::provider; @@ -53,10 +53,7 @@ impl LambdaProcessor { service, tags, rules, - invocation_context: InvocationContext { - request_id: String::new(), - runtime_duration_ms: 0.0, - }, + invocation_context: InvocationContext::new(String::new(), 0.0, 0.0, 0), orphan_logs: Vec::new(), ready_logs: Vec::new(), event_bus, diff --git a/bottlecap/src/metrics/enhanced/constants.rs b/bottlecap/src/metrics/enhanced/constants.rs index 5011b7d64..3c2d34e0a 100644 --- a/bottlecap/src/metrics/enhanced/constants.rs +++ b/bottlecap/src/metrics/enhanced/constants.rs @@ -3,7 +3,7 @@ pub const BASE_LAMBDA_INVOCATION_PRICE: f64 = 0.000_000_2; pub const X86_LAMBDA_PRICE_PER_GB_SECOND: f64 = 0.000_016_666_7; pub const ARM_LAMBDA_PRICE_PER_GB_SECOND: f64 = 0.000_013_333_4; pub const MS_TO_SEC: f64 = 0.001; -pub const MB_TO_GB: f64 = 1024.0; +pub const MB_TO_GB: f64 = 1_024.0; // Enhanced metrics pub const MAX_MEMORY_USED_METRIC: &str = "aws.lambda.enhanced.max_memory_used"; diff --git a/bottlecap/src/tags/lambda/tags.rs b/bottlecap/src/tags/lambda/tags.rs index caf0e3f89..7e6ce54e0 100644 --- a/bottlecap/src/tags/lambda/tags.rs +++ b/bottlecap/src/tags/lambda/tags.rs @@ -239,6 +239,11 @@ impl Lambda { self.tags_map.get(FUNCTION_ARN_KEY) } + #[must_use] + pub fn get_function_name(&self) -> Option<&String> { + self.tags_map.get(FUNCTION_NAME_KEY) + } + #[must_use] pub fn get_tags_map(&self) -> &hash_map::HashMap { &self.tags_map diff --git a/bottlecap/src/tags/provider.rs b/bottlecap/src/tags/provider.rs index a3a6881df..b6e775ac2 100644 --- a/bottlecap/src/tags/provider.rs +++ b/bottlecap/src/tags/provider.rs @@ -47,6 +47,11 @@ impl Provider { self.tag_provider.get_canonical_id() } + #[must_use] + pub fn get_canonical_resource_name(&self) -> Option { + self.tag_provider.get_canonical_resource_name() + } + #[must_use] pub fn get_tags_map(&self) -> &hash_map::HashMap { self.tag_provider.get_tags_map() @@ -56,6 +61,7 @@ impl Provider { trait GetTags { fn get_tags_vec(&self) -> Vec; fn get_canonical_id(&self) -> Option; + fn get_canonical_resource_name(&self) -> Option; fn get_tags_map(&self) -> &hash_map::HashMap; } @@ -72,6 +78,12 @@ impl GetTags for TagProvider { } } + fn get_canonical_resource_name(&self) -> Option { + match self { + TagProvider::Lambda(lambda_tags) => lambda_tags.get_function_name().cloned(), + } + } + fn get_tags_map(&self) -> &hash_map::HashMap { match self { TagProvider::Lambda(lambda_tags) => lambda_tags.get_tags_map(), diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 13ad6a321..57e6fd4c6 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -34,6 +34,7 @@ pub struct TraceAgent { pub stats_processor: Arc, pub stats_flusher: Arc, pub tags_provider: Arc, + tx: Sender, } #[derive(Clone, Copy)] @@ -43,9 +44,15 @@ pub enum ApiVersion { } impl TraceAgent { - pub async fn start_trace_agent(&self) -> Result<(), Box> { - let now = Instant::now(); - + #[must_use] + pub async fn new( + config: Arc, + trace_processor: Arc, + trace_flusher: Arc, + stats_processor: Arc, + stats_flusher: Arc, + tags_provider: Arc, + ) -> TraceAgent { // setup a channel to send processed traces to our flusher. tx is passed through each // endpoint_handler to the trace processor, which uses it to send de-serialized // processed trace payloads to our trace flusher. @@ -54,9 +61,24 @@ impl TraceAgent { // start our trace flusher. receives trace payloads and handles buffering + deciding when to // flush to backend. - let trace_flusher = self.trace_flusher.clone(); + let trace_flusher = trace_flusher.clone(); trace_flusher.start_trace_flusher(trace_rx).await; + TraceAgent { + config, + trace_processor, + trace_flusher, + stats_processor, + stats_flusher, + tags_provider, + tx: trace_tx, + } + } + + pub async fn start(&self) -> Result<(), Box> { + let now = Instant::now(); + let trace_tx = self.tx.clone(); + // channels to send processed stats to our stats flusher. let (stats_tx, stats_rx): ( Sender, @@ -267,4 +289,9 @@ impl TraceAgent { .status(200) .body(Body::from(response_json.to_string())) } + + #[must_use] + pub fn get_sender_copy(&self) -> Sender { + self.tx.clone() + } } From 14611cd6362e94fc35816a3567e0754adb250834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Wed, 9 Oct 2024 14:59:02 -0400 Subject: [PATCH 06/41] feature(bottlecap): add trace context extractor (#401) * add `thiserror` and `lazystatic` * add Span/Trace `context` * update `mod.rs` * add `propagation` module * add `propagation::Error` * add interface for `carrier` and `HashMap` implementation * add `text_map_propagator` added `Datadog` and `Tracecontext` implementations * update `LICENSE-3rdparty.yml` --- bottlecap/Cargo.lock | 2 + bottlecap/Cargo.toml | 2 + bottlecap/src/traces/context.rs | 17 + bottlecap/src/traces/mod.rs | 2 + bottlecap/src/traces/propagation/carrier.rs | 68 +++ bottlecap/src/traces/propagation/error.rs | 33 ++ bottlecap/src/traces/propagation/mod.rs | 3 + .../traces/propagation/text_map_propagator.rs | 508 ++++++++++++++++++ 8 files changed, 635 insertions(+) create mode 100644 bottlecap/src/traces/context.rs create mode 100644 bottlecap/src/traces/propagation/carrier.rs create mode 100644 bottlecap/src/traces/propagation/error.rs create mode 100644 bottlecap/src/traces/propagation/mod.rs create mode 100644 bottlecap/src/traces/propagation/text_map_propagator.rs diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index 3c206bc53..12c93cb2d 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -425,6 +425,7 @@ dependencies = [ "hmac", "httpmock", "hyper 0.14.30", + "lazy_static", "log", "proptest", "protobuf", @@ -436,6 +437,7 @@ dependencies = [ "serde_json", "serial_test", "sha2", + "thiserror", "tokio", "tokio-util", "tracing", diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index 5e539298a..a8ade97f4 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -18,12 +18,14 @@ datadog-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev dogstatsd = { git = "https://github.com/DataDog/libdatadog", rev = "92272e90a7919f07178f3246ef8f82295513cfed" } figment = { version = "0.10", default-features = false, features = ["yaml", "env"] } hyper = { version = "0.14", default-features = false, features = ["server"] } +lazy_static = { version = "1.5", default-features = false } log = { version = "0.4", default-features = false } protobuf = { version = "3.5", default-features = false } regex = { version = "1.10", default-features = false } reqwest = { version = "0.12", features = ["json", "http2", "rustls-tls"], default-features = false } serde = { version = "1.0", default-features = false, features = ["derive"] } serde_json = { version = "1.0", default-features = false, features = ["alloc"] } +thiserror = { version = "1.0", default-features = false} tokio = { version = "1.37", default-features = false, features = ["macros", "rt-multi-thread"] } tokio-util = { version = "0.7", default-features = false } tracing = { version = "0.1", default-features = false } diff --git a/bottlecap/src/traces/context.rs b/bottlecap/src/traces/context.rs new file mode 100644 index 000000000..35e9921ee --- /dev/null +++ b/bottlecap/src/traces/context.rs @@ -0,0 +1,17 @@ +use std::collections::HashMap; + +#[derive(Copy, Clone, Default, Debug)] +pub struct Sampling { + pub priority: Option, + pub mechanism: Option, +} + +#[derive(Clone, Default, Debug)] +#[allow(clippy::module_name_repetitions)] +pub struct SpanContext { + pub trace_id: u64, + pub span_id: u64, + pub sampling: Option, + pub origin: Option, + pub tags: HashMap, +} diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index 8545fbe40..d8facd07c 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -1,6 +1,8 @@ // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +pub mod context; +pub mod propagation; pub mod stats_flusher; pub mod stats_processor; pub mod trace_agent; diff --git a/bottlecap/src/traces/propagation/carrier.rs b/bottlecap/src/traces/propagation/carrier.rs new file mode 100644 index 000000000..fc5ef9dbb --- /dev/null +++ b/bottlecap/src/traces/propagation/carrier.rs @@ -0,0 +1,68 @@ +/// Code inspired, and copied, by OpenTelemetry Rust project. +/// +/// +use std::collections::HashMap; + +/// Injector provides an interface for a carrier to be used +/// with a Propagator to inject a Context into the carrier. +/// +pub trait Injector { + /// Set a value in the carrier. + fn set(&mut self, key: &str, value: String); +} + +pub trait Extractor { + /// Get a value from the carrier. + fn get(&self, key: &str) -> Option<&str>; + + /// Get all keys from the carrier. + fn keys(&self) -> Vec<&str>; +} + +impl Injector for HashMap { + /// Set a key and value in the `HashMap`. + fn set(&mut self, key: &str, value: String) { + self.insert(key.to_lowercase(), value); + } +} + +impl Extractor for HashMap { + /// Get a value for a key from the `HashMap`. + fn get(&self, key: &str) -> Option<&str> { + self.get(&key.to_lowercase()).map(String::as_str) + } + + /// Collect all the keys from the `HashMap`. + fn keys(&self) -> Vec<&str> { + self.keys().map(String::as_str).collect::>() + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn hash_map_get() { + let mut carrier = HashMap::new(); + carrier.set("headerName", "value".to_string()); + + assert_eq!( + Extractor::get(&carrier, "HEADERNAME"), + Some("value"), + "case insensitive extraction" + ); + } + + #[test] + fn hash_map_keys() { + let mut carrier = HashMap::new(); + carrier.set("headerName1", "value1".to_string()); + carrier.set("headerName2", "value2".to_string()); + + let got = Extractor::keys(&carrier); + assert_eq!(got.len(), 2); + assert!(got.contains(&"headername1")); + assert!(got.contains(&"headername2")); + } +} diff --git a/bottlecap/src/traces/propagation/error.rs b/bottlecap/src/traces/propagation/error.rs new file mode 100644 index 000000000..af7a37d9b --- /dev/null +++ b/bottlecap/src/traces/propagation/error.rs @@ -0,0 +1,33 @@ +use thiserror::Error; + +#[derive(Error, Debug, Copy, Clone)] +#[error("Cannot {} from {}, {}", operation, message, propagator_name)] +pub struct Error { + message: &'static str, + // which propagator this error comes from + propagator_name: &'static str, + // what operation was attempted + operation: &'static str, +} + +impl Error { + /// Error when extracting a value from a carrier + #[must_use] + pub fn extract(message: &'static str, propagator_name: &'static str) -> Self { + Self { + message, + propagator_name, + operation: "extract", + } + } + + /// Error when injecting a value into a carrier + #[allow(clippy::must_use_candidate)] + pub fn inject(message: &'static str, propagator_name: &'static str) -> Self { + Self { + message, + propagator_name, + operation: "inject", + } + } +} diff --git a/bottlecap/src/traces/propagation/mod.rs b/bottlecap/src/traces/propagation/mod.rs new file mode 100644 index 000000000..ef268ef61 --- /dev/null +++ b/bottlecap/src/traces/propagation/mod.rs @@ -0,0 +1,3 @@ +pub mod carrier; +pub mod error; +pub mod text_map_propagator; diff --git a/bottlecap/src/traces/propagation/text_map_propagator.rs b/bottlecap/src/traces/propagation/text_map_propagator.rs new file mode 100644 index 000000000..ce3f5abd1 --- /dev/null +++ b/bottlecap/src/traces/propagation/text_map_propagator.rs @@ -0,0 +1,508 @@ +use std::collections::HashMap; + +use lazy_static::lazy_static; +use log::warn; +use regex::Regex; +use tracing::{debug, error}; + +use super::{ + carrier::{Extractor, Injector}, + error::Error, +}; + +use crate::traces::context::{Sampling, SpanContext}; + +// Datadog Keys +const DATADOG_TRACE_ID_KEY: &str = "x-datadog-trace-id"; +const DATADOG_PARENT_ID_KEY: &str = "x-datadog-parent-id"; +const DATADOG_SAMPLING_PRIORITY_KEY: &str = "x-datadog-sampling-priority"; +const DATADOG_ORIGIN_KEY: &str = "x-datadog-origin"; +const DATADOG_TAGS_KEY: &str = "x-datadog-tags"; + +const DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY: &str = "_dd.p.tid"; +const DATADOG_PROPAGATION_ERROR_KEY: &str = "_dd.propagation_error"; +const DATADOG_LAST_PARENT_ID_KEY: &str = "_dd.parent_id"; +const DATADOG_SAMPLING_DECISION_KEY: &str = "_dd.p.dm"; + +// Traceparent Keys +const TRACEPARENT_KEY: &str = "traceparent"; +const TRACESTATE_KEY: &str = "tracestate"; + +lazy_static! { + static ref TRACEPARENT_REGEX: Regex = + Regex::new(r"(?i)^([a-f0-9]{2})-([a-f0-9]{32})-([a-f0-9]{16})-([a-f0-9]{2})(-.*)?$") + .expect("failed creating regex"); + static ref INVALID_SEGMENT_REGEX: Regex = Regex::new(r"^0+$").expect("failed creating regex"); + static ref VALID_TAG_KEY_REGEX: Regex = + Regex::new(r"^_dd\.p\.[\x21-\x2b\x2d-\x7e]+$").expect("failed creating regex"); + static ref VALID_TAG_VALUE_REGEX: Regex = + Regex::new(r"^[\x20-\x2b\x2d-\x7e]*$").expect("failed creating regex"); + static ref INVALID_ASCII_CHARACTERS_REGEX: Regex = + Regex::new(r"[^\x20-\x7E]+").expect("failed creating regex"); + static ref VALID_SAMPLING_DECISION_REGEX: Regex = + Regex::new(r"^-([0-9])$").expect("failed creating regex"); +} + +pub trait TextMapPropagator { + fn extract(&self, carrier: &dyn Extractor) -> SpanContext; + fn inject(&self, context: SpanContext, carrier: &mut dyn Injector); +} + +#[derive(Clone, Copy)] +pub struct DatadogPropagator; + +impl TextMapPropagator for DatadogPropagator { + fn extract(&self, carrier: &dyn Extractor) -> SpanContext { + Self::extract_context(carrier).unwrap_or_default() + } + + fn inject(&self, _context: SpanContext, _carrier: &mut dyn Injector) { + todo!(); + } +} + +impl DatadogPropagator { + fn extract_context(carrier: &dyn Extractor) -> Option { + if let Some(trace_id) = Self::extract_trace_id(carrier) { + let parent_id = Self::extract_parent_id(carrier).unwrap_or(0); + let origin = Self::extract_origin(carrier); + let mut tags = Self::extract_tags(carrier); + let sampling_priority = Self::extract_sampling_priority(carrier).unwrap_or(2); + + Self::validate_sampling_decision(&mut tags); + + return Some(SpanContext { + trace_id, + span_id: parent_id, + sampling: Some(Sampling { + priority: Some(sampling_priority), + mechanism: None, + }), + origin, + tags, + }); + } + + None + } + + fn validate_sampling_decision(tags: &mut HashMap) { + let should_remove = + tags.get(DATADOG_SAMPLING_DECISION_KEY) + .map_or(false, |sampling_decision| { + let is_invalid = !VALID_SAMPLING_DECISION_REGEX.is_match(sampling_decision); + if is_invalid { + warn!("Failed to decode `_dd.p.dm`: {}", sampling_decision); + } + is_invalid + }); + + if should_remove { + tags.remove(DATADOG_SAMPLING_DECISION_KEY); + tags.insert( + DATADOG_PROPAGATION_ERROR_KEY.to_string(), + "decoding_error".to_string(), + ); + } + + // todo: appsec standalone + } + + fn extract_trace_id(carrier: &dyn Extractor) -> Option { + let trace_id = carrier.get(DATADOG_TRACE_ID_KEY)?; + + if INVALID_SEGMENT_REGEX.is_match(trace_id) { + return None; + } + + trace_id.parse::().ok() + } + + fn extract_parent_id(carrier: &dyn Extractor) -> Option { + let parent_id = carrier.get(DATADOG_PARENT_ID_KEY)?; + + parent_id.parse::().ok() + } + + fn extract_sampling_priority(carrier: &dyn Extractor) -> Option { + let sampling_priority = carrier.get(DATADOG_SAMPLING_PRIORITY_KEY)?; + + sampling_priority.parse::().ok() + } + + fn extract_origin(carrier: &dyn Extractor) -> Option { + let origin = carrier.get(DATADOG_ORIGIN_KEY)?; + Some(origin.to_string()) + } + + fn extract_tags(carrier: &dyn Extractor) -> HashMap { + let carrier_tags = carrier.get(DATADOG_TAGS_KEY).unwrap_or_default(); + let mut tags: HashMap = HashMap::new(); + + // todo: + // - trace propagation disabled + // - trace propagation max lenght + + let pairs = carrier_tags.split(','); + for pair in pairs { + if let Some((k, v)) = pair.split_once('=') { + // todo: reject key on tags extract reject + if k.starts_with("_dd.p") { + tags.insert(k.to_string(), v.to_string()); + } + } + } + + // Handle 128bit trace ID + if !tags.is_empty() { + if let Some(trace_id_higher_order_bits) = + carrier.get(DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY) + { + if !Self::higher_order_bits_valid(trace_id_higher_order_bits) { + warn!("Malformed Trace ID: {trace_id_higher_order_bits} Failed to decode trace ID from carrier."); + tags.insert( + DATADOG_PROPAGATION_ERROR_KEY.to_string(), + format!("malformed tid {trace_id_higher_order_bits}"), + ); + tags.remove(DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY); + } + } + } + + if !tags.contains_key(DATADOG_SAMPLING_DECISION_KEY) { + tags.insert(DATADOG_SAMPLING_DECISION_KEY.to_string(), "-3".to_string()); + } + + tags + } + + fn higher_order_bits_valid(trace_id_higher_order_bits: &str) -> bool { + if trace_id_higher_order_bits.len() != 16 { + return false; + } + + match u64::from_str_radix(trace_id_higher_order_bits, 16) { + Ok(_) => {} + Err(_) => return false, + } + + true + } +} + +struct Traceparent { + sampling_priority: i8, + trace_id: u128, + span_id: u64, +} + +struct Tracestate { + sampling_priority: Option, + origin: Option, + lower_order_trace_id: Option, +} + +#[derive(Clone, Copy)] +pub struct TraceparentPropagator; + +impl TextMapPropagator for TraceparentPropagator { + fn extract(&self, carrier: &dyn Extractor) -> SpanContext { + Self::extract_context(carrier).unwrap_or_default() + } + + fn inject(&self, _context: SpanContext, _carrier: &mut dyn Injector) { + todo!() + } +} + +impl TraceparentPropagator { + fn extract_context(carrier: &dyn Extractor) -> Option { + let tp = carrier.get(TRACEPARENT_KEY)?.trim(); + + match Self::extract_traceparent(tp) { + Ok(traceparent) => { + let mut tags = HashMap::new(); + tags.insert(TRACEPARENT_KEY.to_string(), tp.to_string()); + + let mut origin = None; + let mut sampling_priority = traceparent.sampling_priority; + if let Some(ts) = carrier.get(TRACESTATE_KEY) { + if let Some(tracestate) = Self::extract_tracestate(ts, &mut tags) { + if let Some(lpid) = tracestate.lower_order_trace_id { + tags.insert(DATADOG_LAST_PARENT_ID_KEY.to_string(), lpid); + } + + origin = tracestate.origin; + + sampling_priority = Self::define_sampling_priority( + traceparent.sampling_priority, + tracestate.sampling_priority, + ); + } + } else { + debug!("No `dd` value found in tracestate"); + } + + let (trace_id_higher_order_bits, trace_id_lower_order_bits) = + Self::split_trace_id(traceparent.trace_id); + tags.insert( + DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY.to_string(), + trace_id_higher_order_bits.to_string(), + ); + + Some(SpanContext { + trace_id: trace_id_lower_order_bits, + span_id: traceparent.span_id, + sampling: Some(Sampling { + priority: Some(sampling_priority), + mechanism: None, + }), + origin, + tags, + }) + } + Err(e) => { + error!("Failed to extract traceparent: {e}"); + None + } + } + } + + fn extract_tracestate( + tracestate: &str, + tags: &mut HashMap, + ) -> Option { + let ts_v = tracestate.split(',').map(str::trim); + let ts = ts_v.clone().collect::>().join(","); + + if INVALID_ASCII_CHARACTERS_REGEX.is_match(&ts) { + debug!("Received invalid tracestate header {tracestate}"); + return None; + } + + tags.insert(TRACESTATE_KEY.to_string(), ts.to_string()); + + let mut dd: Option> = None; + for v in ts_v.clone() { + if let Some(stripped) = v.strip_prefix("dd=") { + dd = Some( + stripped + .split(';') + .filter_map(|item| { + let mut parts = item.splitn(2, ':'); + Some((parts.next()?.to_string(), parts.next()?.to_string())) + }) + .collect(), + ); + } + } + + if let Some(dd) = dd { + let mut tracestate = Tracestate { + sampling_priority: None, + origin: None, + lower_order_trace_id: None, + }; + + if let Some(ts_sp) = dd.get("s") { + if let Ok(p_sp) = ts_sp.parse::() { + tracestate.sampling_priority = Some(p_sp); + } + } + + if let Some(o) = dd.get("o") { + tracestate.origin = Some(Self::decode_tag_value(o)); + } + + if let Some(lo_tid) = dd.get("p") { + tracestate.lower_order_trace_id = Some(lo_tid.to_string()); + } + + for (k, v) in &dd { + if k.starts_with("t.") { + let nk = format!("_dd.p.{k}"); + tags.insert(nk, Self::decode_tag_value(v)); + } + } + + return Some(tracestate); + } + + None + } + + fn decode_tag_value(value: &str) -> String { + value.replace('~', "=") + } + + fn define_sampling_priority( + traceparent_sampling_priority: i8, + tracestate_sampling_priority: Option, + ) -> i8 { + if let Some(ts_sp) = tracestate_sampling_priority { + if (traceparent_sampling_priority == 1 && ts_sp > 0) + || (traceparent_sampling_priority == 0 && ts_sp < 0) + { + return ts_sp; + } + } + + traceparent_sampling_priority + } + + fn extract_traceparent(traceparent: &str) -> Result { + let captures = TRACEPARENT_REGEX + .captures(traceparent) + .ok_or_else(|| Error::extract("invalid traceparent", "traceparent"))?; + + let version = &captures[1]; + let trace_id = &captures[2]; + let span_id = &captures[3]; + let flags = &captures[4]; + let tail = captures.get(5).map_or("", |m| m.as_str()); + + Self::extract_version(version, tail)?; + + let trace_id = Self::extract_trace_id(trace_id)?; + let span_id = Self::extract_span_id(span_id)?; + + let trace_flags = Self::extract_trace_flags(flags)?; + let sampling_priority = i8::from(trace_flags & 0x1 != 0); + + Ok(Traceparent { + sampling_priority, + trace_id, + span_id, + }) + } + + fn extract_version(version: &str, tail: &str) -> Result<(), Error> { + match version { + "ff" => { + return Err(Error::extract( + "`ff` is an invalid traceparent version", + "traceparent", + )) + } + "00" => { + if !tail.is_empty() { + return Err(Error::extract("Traceparent with version `00` should contain only 4 values delimited by `-`", "traceparent")); + } + } + _ => { + warn!("Unsupported traceparent version {version}, still atempenting to parse"); + } + } + + Ok(()) + } + + fn extract_trace_id(trace_id: &str) -> Result { + if INVALID_SEGMENT_REGEX.is_match(trace_id) { + return Err(Error::extract( + "`0` value for trace_id is invalid", + "traceparent", + )); + } + + u128::from_str_radix(trace_id, 16) + .map_err(|_| Error::extract("Failed to decode trace_id", "traceparent")) + } + + #[allow(clippy::cast_possible_truncation)] + fn split_trace_id(trace_id: u128) -> (u64, u64) { + let trace_id_lower_order_bits = trace_id as u64; + let trace_id_higher_order_bits = (trace_id >> 64) as u64; + + (trace_id_higher_order_bits, trace_id_lower_order_bits) + } + + fn extract_span_id(span_id: &str) -> Result { + if INVALID_SEGMENT_REGEX.is_match(span_id) { + return Err(Error::extract( + "`0` value for span_id is invalid", + "traceparent", + )); + } + + u64::from_str_radix(span_id, 16) + .map_err(|_| Error::extract("Failed to decode span_id", "traceparent")) + } + + fn extract_trace_flags(flags: &str) -> Result { + if flags.len() != 2 { + return Err(Error::extract("Invalid trace flags length", "traceparent")); + } + + u8::from_str_radix(flags, 16) + .map_err(|_| Error::extract("Failed to decode trace_flags", "traceparent")) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_extract_datadog_propagator() { + let mut headers = HashMap::new(); + headers.insert("x-datadog-trace-id".to_string(), "1234".to_string()); + headers.insert("x-datadog-parent-id".to_string(), "5678".to_string()); + headers.insert("x-datadog-sampling-priority".to_string(), "1".to_string()); + headers.insert("x-datadog-origin".to_string(), "synthetics".to_string()); + headers.insert( + "x-datadog-tags".to_string(), + "_dd.p.test=value,_dd.p.tid=4321,any=tag".to_string(), + ); + + let propagator = DatadogPropagator; + + let context = propagator.extract(&headers); + + assert_eq!(context.trace_id, 1234); + assert_eq!(context.span_id, 5678); + assert_eq!(context.sampling.unwrap().priority, Some(1)); + assert_eq!(context.origin, Some("synthetics".to_string())); + println!("{:?}", context.tags); + assert_eq!(context.tags.get("_dd.p.test").unwrap(), "value"); + assert_eq!(context.tags.get("_dd.p.tid").unwrap(), "4321"); + assert_eq!(context.tags.get("_dd.p.dm").unwrap(), "-3"); + } + + #[test] + fn test_extract_traceparent_propagator() { + let mut headers = HashMap::new(); + headers.insert( + "traceparent".to_string(), + "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string(), + ); + headers.insert( + "tracestate".to_string(), + "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string(), + ); + + let propagator = TraceparentPropagator; + let context = propagator.extract(&headers); + + assert_eq!(context.trace_id, 7277407061855694839); + assert_eq!(context.span_id, 67667974448284343); + assert_eq!(context.sampling.unwrap().priority, Some(2)); + assert_eq!(context.origin, Some("rum".to_string())); + assert_eq!( + context.tags.get("traceparent").unwrap(), + "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01" + ); + assert_eq!( + context.tags.get("tracestate").unwrap(), + "dd=p:00f067aa0ba902b7;s:2;o:rum" + ); + assert_eq!( + context.tags.get("_dd.p.tid").unwrap(), + "9291375655657946024" + ); + assert_eq!( + context.tags.get("_dd.parent_id").unwrap(), + "00f067aa0ba902b7" + ); + } +} From 865e62e78a60398d01f9a971c7eaac95b375ff17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Wed, 9 Oct 2024 14:59:24 -0400 Subject: [PATCH 07/41] feat(bottlecap): create Inferred Spans baseline + infer API Gateway HTTP spans (#405) * add `Trigger` trait for inferred spans * add `ApiGatewayHttpEvent` trigger * add `SpanInferrer` * make `invocation::processor` to use `SpanInferrer` * send `aws_config` to `invocation::processor` * use incoming payload for `invocation::processor` for span inferring * add `api_gateway_http_event.json` for testing * add `api_gateway_proxy_event.json` for testing * fix: Convert tag hashmap to sorted vector of tags * fix: fmt --------- Co-authored-by: AJ Stuyvenberg --- bottlecap/Cargo.lock | 1 + bottlecap/Cargo.toml | 1 + bottlecap/src/bin/bottlecap/main.rs | 13 +- bottlecap/src/config/mod.rs | 1 + bottlecap/src/lifecycle/invocation/mod.rs | 2 + .../src/lifecycle/invocation/processor.rs | 54 ++- .../src/lifecycle/invocation/span_inferrer.rs | 109 ++++++ .../triggers/api_gateway_http_event.rs | 317 ++++++++++++++++++ .../src/lifecycle/invocation/triggers/mod.rs | 51 +++ bottlecap/src/lifecycle/listener.rs | 33 +- .../payloads/api_gateway_http_event.json | 38 +++ .../payloads/api_gateway_proxy_event.json | 127 +++++++ 12 files changed, 724 insertions(+), 23 deletions(-) create mode 100644 bottlecap/src/lifecycle/invocation/span_inferrer.rs create mode 100644 bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs create mode 100644 bottlecap/src/lifecycle/invocation/triggers/mod.rs create mode 100644 bottlecap/tests/payloads/api_gateway_http_event.json create mode 100644 bottlecap/tests/payloads/api_gateway_proxy_event.json diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index 12c93cb2d..f96fd4c7a 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -429,6 +429,7 @@ dependencies = [ "log", "proptest", "protobuf", + "rand", "regex", "reqwest", "rmp-serde", diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index a8ade97f4..09b3c0e13 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -37,6 +37,7 @@ hex = { version = "0.4", default-features = false, features = ["std"] } base64 = { version = "0.22", default-features = false } rmp-serde = { version = "1.3.0", default-features = false } rustls = { version = "0.23.12", default-features = false, features = ["aws-lc-rs"] } +rand = { version = "0.8", default-features = false } [dev-dependencies] figment = { version = "0.10", default-features = false, features = ["yaml", "env", "test"] } diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 8993e3307..0d7108677 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -264,11 +264,13 @@ async fn extension_loop_active( ) -> Result<()> { let mut event_bus = EventBus::run(); - let tags_provider = setup_tag_provider( - aws_config, - config, - r.account_id.as_ref().unwrap_or(&"none".to_string()), - ); + let account_id = r + .account_id + .as_ref() + .unwrap_or(&"none".to_string()) + .to_string(); + let tags_provider = setup_tag_provider(aws_config, config, &account_id); + let (logs_agent_channel, logs_flusher) = start_logs_agent( config, resolved_api_key.clone(), @@ -297,6 +299,7 @@ async fn extension_loop_active( let invocation_processor = Arc::new(TokioMutex::new(InvocationProcessor::new( Arc::clone(&tags_provider), Arc::clone(config), + aws_config, ))); let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor { obfuscation_config: Arc::new( diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index 3785843bb..43f7c2548 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -189,6 +189,7 @@ pub fn get_config(config_directory: &Path) -> Result { } #[allow(clippy::module_name_repetitions)] +#[derive(Debug, Clone)] pub struct AwsConfig { pub region: String, pub aws_access_key_id: String, diff --git a/bottlecap/src/lifecycle/invocation/mod.rs b/bottlecap/src/lifecycle/invocation/mod.rs index bf32ed105..39d0557dc 100644 --- a/bottlecap/src/lifecycle/invocation/mod.rs +++ b/bottlecap/src/lifecycle/invocation/mod.rs @@ -1,2 +1,4 @@ pub mod context; pub mod processor; +pub mod span_inferrer; +pub mod triggers; diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index e553150d1..33f94987a 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -11,20 +11,29 @@ use tokio::sync::mpsc::Sender; use tracing::debug; use crate::{ - config, lifecycle::invocation::context::ContextBuffer, tags::provider, traces::trace_processor, + config::{self, AwsConfig}, + lifecycle::invocation::{context::ContextBuffer, span_inferrer::SpanInferrer}, + tags::provider, + traces::trace_processor, }; pub const MS_TO_NS: f64 = 1_000_000.0; pub struct Processor { pub context_buffer: ContextBuffer, + inferrer: SpanInferrer, pub span: Span, + aws_config: AwsConfig, tracer_detected: bool, } impl Processor { #[must_use] - pub fn new(tags_provider: Arc, config: Arc) -> Self { + pub fn new( + tags_provider: Arc, + config: Arc, + aws_config: &AwsConfig, + ) -> Self { let service = config.service.clone().unwrap_or("aws.lambda".to_string()); let resource = tags_provider .get_canonical_resource_name() @@ -32,6 +41,7 @@ impl Processor { Processor { context_buffer: ContextBuffer::default(), + inferrer: SpanInferrer::default(), span: Span { service, name: "aws.lambda".to_string(), @@ -48,6 +58,7 @@ impl Processor { meta_struct: HashMap::new(), span_links: Vec::new(), }, + aws_config: aws_config.clone(), tracer_detected: false, } } @@ -86,7 +97,6 @@ impl Processor { span.duration = (context.runtime_duration_ms * MS_TO_NS).round() as i64; span.meta .insert("request_id".to_string(), request_id.clone()); - // todo(duncanista): add missing tags // - cold start, proactive init // - language @@ -99,8 +109,15 @@ impl Processor { // - metrics tags (for asm) } + self.inferrer.complete_inferred_span(&self.span); + if self.tracer_detected { - let span_size = std::mem::size_of_val(&self.span); + let mut body_size = std::mem::size_of_val(&self.span); + let mut traces = vec![self.span.clone()]; + if let Some(inferred_span) = self.inferrer.get_inferred_span() { + body_size += std::mem::size_of_val(inferred_span); + traces.push(inferred_span.clone()); + } // todo: figure out what to do here let header_tags = tracer_header_tags::TracerHeaderTags { @@ -118,8 +135,8 @@ impl Processor { config.clone(), tags_provider.clone(), header_tags, - vec![vec![self.span.clone()]], - span_size, + vec![traces], + body_size, ); if let Err(e) = trace_agent_tx.send(send_data).await { @@ -149,16 +166,31 @@ impl Processor { /// If this method is called, it means that we are operating in a Universally Instrumented /// runtime. Therefore, we need to set the `tracer_detected` flag to `true`. /// - pub fn on_invocation_start(&mut self) { + pub fn on_invocation_start(&mut self, payload: Vec) { self.tracer_detected = true; + + // Reset trace context + self.span.trace_id = 0; + self.span.parent_id = 0; + self.span.span_id = 0; + + self.inferrer.infer_span(&payload, &self.aws_config); + + if let Some(inferred_span) = self.inferrer.get_inferred_span() { + self.span.parent_id = inferred_span.span_id; + } } /// Given trace context information, set it to the current span. /// pub fn on_invocation_end(&mut self, trace_id: u64, span_id: u64, parent_id: u64) { - let span = &mut self.span; - span.trace_id = trace_id; - span.span_id = span_id; - span.parent_id = parent_id; + self.span.trace_id = trace_id; + self.span.span_id = span_id; + + if self.inferrer.get_inferred_span().is_some() { + self.inferrer.set_parent_id(parent_id); + } else { + self.span.parent_id = parent_id; + } } } diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs new file mode 100644 index 000000000..91bf3b722 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -0,0 +1,109 @@ +use datadog_trace_protobuf::pb::Span; +use log::debug; +use rand::Rng; +use serde_json::Value; + +use crate::config::AwsConfig; + +use crate::lifecycle::invocation::triggers::{ + api_gateway_http_event::APIGatewayHttpEvent, Trigger, +}; + +const FUNCTION_TRIGGER_EVENT_SOURCE_TAG: &str = "function_trigger.event_source"; +const FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG: &str = "function_trigger.event_source_arn"; + +pub struct SpanInferrer { + inferred_span: Option, + is_async_span: bool, +} + +impl Default for SpanInferrer { + fn default() -> Self { + Self::new() + } +} + +impl SpanInferrer { + #[must_use] + pub fn new() -> Self { + Self { + inferred_span: None, + is_async_span: false, + } + } + + /// Given a byte payload, try to deserialize it into a `serde_json::Value` + /// and try matching it to a `Trigger` implementation, which will create + /// an inferred span and set it to `self.inferred_span` + /// + pub fn infer_span(&mut self, payload: &[u8], aws_config: &AwsConfig) { + self.inferred_span = None; + if let Ok(payload_value) = serde_json::from_slice::(payload) { + if APIGatewayHttpEvent::is_match(&payload_value) { + if let Some(t) = APIGatewayHttpEvent::new(payload_value) { + let mut span = Span { + span_id: Self::generate_span_id(), + ..Default::default() + }; + + t.enrich_span(&mut span); + span.meta.extend([ + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "api_gateway".to_string(), + ), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), + t.get_arn(&aws_config.region), + ), + ]); + + self.is_async_span = t.is_async(); + self.inferred_span = Some(span); + } + } else { + debug!("Unable to infer span from payload"); + } + } else { + debug!("Unable to serialize payload"); + } + } + + /// If a `self.inferred_span` exist, set the `parent_id` to + /// the span. + /// + pub fn set_parent_id(&mut self, parent_id: u64) { + if let Some(s) = &mut self.inferred_span { + s.parent_id = parent_id; + } + } + + pub fn complete_inferred_span(&mut self, invocation_span: &Span) { + if let Some(s) = &mut self.inferred_span { + if self.is_async_span { + if s.duration != 0 { + let duration = invocation_span.start - s.start; + s.duration = duration; + } + } else { + let duration = (invocation_span.start + invocation_span.duration) - s.start; + s.duration = duration; + } + + s.trace_id = invocation_span.trace_id; + } + } + + fn generate_span_id() -> u64 { + // todo: secure random id with OsRng for SnapStart + let mut rng = rand::thread_rng(); + rng.gen() + } + + /// Returns a reference to the inner `self.inferred_span` + /// + #[must_use] + pub fn get_inferred_span(&self) -> &Option { + &self.inferred_span + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs new file mode 100644 index 000000000..434d636af --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs @@ -0,0 +1,317 @@ +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::MS_TO_NS, + triggers::{get_aws_partition_by_region, lowercase_key, Trigger}, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct APIGatewayHttpEvent { + #[serde(rename = "routeKey")] + pub route_key: String, + #[serde(serialize_with = "lowercase_key")] + pub headers: HashMap, + #[serde(rename = "requestContext")] + pub request_context: RequestContext, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct RequestContext { + pub stage: String, + #[serde(rename = "requestId")] + pub request_id: String, + #[serde(rename = "apiId")] + pub api_id: String, + #[serde(rename = "domainName")] + pub domain_name: String, + #[serde(rename = "timeEpoch")] + pub time_epoch: i64, + pub http: RequestContextHTTP, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct RequestContextHTTP { + pub method: String, + pub path: String, + pub protocol: String, + #[serde(rename = "sourceIp")] + pub source_ip: String, + #[serde(rename = "userAgent")] + pub user_agent: String, +} + +impl Trigger for APIGatewayHttpEvent { + fn new(payload: Value) -> Option { + serde_json::from_value(payload).ok()? + } + + fn is_match(payload: &Value) -> bool { + let version = payload.get("version"); + let domain_name: Option<&Value> = payload + .get("requestContext") + .and_then(|d| d.get("domainName")); + + version.is_some_and(|v| v == "2.0") + && payload.get("rawQueryString").is_some() + && domain_name.is_some_and(|d| d.as_str().map_or(true, |s| !s.contains("lambda-url"))) + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span) { + debug!("Enriching an Inferred Span for an API Gateway HTTP Event"); + let resource = format!( + "{http_method} {path}", + http_method = self.request_context.http.method, + path = self.request_context.http.path + ); + let http_url = format!( + "{domain_name}{path}", + domain_name = self.request_context.domain_name, + path = self.request_context.http.path + ); + let start_time = (self.request_context.time_epoch as f64 * MS_TO_NS) as i64; + // todo: service mapping + let service_name = self.request_context.domain_name.clone(); + + span.name = "aws.httpapi".to_string(); + span.service = service_name; + span.resource.clone_from(&resource); + span.r#type = "http".to_string(); + span.start = start_time; + span.meta.extend(HashMap::from([ + ( + "endpoint".to_string(), + self.request_context.http.path.clone(), + ), + ("http.url".to_string(), http_url), + ( + "http.method".to_string(), + self.request_context.http.method.clone(), + ), + ( + "http.protocol".to_string(), + self.request_context.http.protocol.clone(), + ), + ( + "http.source_ip".to_string(), + self.request_context.http.source_ip.clone(), + ), + ( + "http.user_agent".to_string(), + self.request_context.http.user_agent.clone(), + ), + ("operation_name".to_string(), "aws.httpapi".to_string()), + ( + "request_id".to_string(), + self.request_context.request_id.clone(), + ), + ("resource_names".to_string(), resource), + ])); + + // todo: update global(? IsAsync if event payload is `Event` + } + + fn get_tags(&self) -> HashMap { + let mut tags = HashMap::from([ + ( + "http.url".to_string(), + self.request_context.domain_name.clone(), + ), + ( + "http_url_details.path".to_string(), + self.request_context.http.path.clone(), + ), + ( + "http.method".to_string(), + self.request_context.http.method.clone(), + ), + ]); + + if !self.route_key.is_empty() { + tags.insert("http.route".to_string(), self.route_key.clone()); + } + + if let Some(referer) = self.headers.get("referer") { + tags.insert("http.referer".to_string(), referer.to_string()); + } + + if let Some(user_agent) = self.headers.get("user-agent") { + tags.insert("http.user_agent".to_string(), user_agent.to_string()); + } + + tags + } + + fn get_arn(&self, region: &str) -> String { + let partition = get_aws_partition_by_region(region); + format!( + "arn:{partition}:apigateway:{region}::/restapis/{api_id}/stages/{stage}", + partition = partition, + region = region, + api_id = self.request_context.api_id, + stage = self.request_context.stage + ) + } + + fn is_async(&self) -> bool { + self.headers + .get("x-amz-invocation-type") + .is_some_and(|v| v == "Event") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = APIGatewayHttpEvent::new(payload) + .expect("Failed to deserialize into APIGatewayHttpEvent"); + + let expected = APIGatewayHttpEvent { + route_key: "GET /httpapi/get".to_string(), + headers: HashMap::from([ + ("accept".to_string(), "*/*".to_string()), + ("content-length".to_string(), "0".to_string()), + ( + "host".to_string(), + "x02yirxc7a.execute-api.sa-east-1.amazonaws.com".to_string(), + ), + ("user-agent".to_string(), "curl/7.64.1".to_string()), + ( + "x-amzn-trace-id".to_string(), + "Root=1-613a52fb-4c43cfc95e0241c1471bfa05".to_string(), + ), + ("x-forwarded-for".to_string(), "38.122.226.210".to_string()), + ("x-forwarded-port".to_string(), "443".to_string()), + ("x-forwarded-proto".to_string(), "https".to_string()), + ("x-datadog-trace-id".to_string(), "12345".to_string()), + ("x-datadog-parent-id".to_string(), "67890".to_string()), + ("x-datadog-sampling-priority".to_string(), "2".to_string()), + ]), + request_context: RequestContext { + stage: "$default".to_string(), + request_id: "FaHnXjKCGjQEJ7A=".to_string(), + api_id: "x02yirxc7a".to_string(), + domain_name: "x02yirxc7a.execute-api.sa-east-1.amazonaws.com".to_string(), + time_epoch: 1631212283738, + http: RequestContextHTTP { + method: "GET".to_string(), + path: "/httpapi/get".to_string(), + protocol: "HTTP/1.1".to_string(), + source_ip: "38.122.226.210".to_string(), + user_agent: "curl/7.64.1".to_string(), + }, + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize APIGatewayHttpEvent"); + + assert!(APIGatewayHttpEvent::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("api_gateway_proxy_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize APIGatewayHttpEvent"); + assert!(!APIGatewayHttpEvent::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + let mut span = Span::default(); + event.enrich_span(&mut span); + assert_eq!(span.name, "aws.httpapi"); + assert_eq!( + span.service, + "x02yirxc7a.execute-api.sa-east-1.amazonaws.com" + ); + assert_eq!(span.resource, "GET /httpapi/get"); + assert_eq!(span.r#type, "http"); + assert_eq!( + span.meta, + HashMap::from([ + ("endpoint".to_string(), "/httpapi/get".to_string()), + ( + "http.url".to_string(), + "x02yirxc7a.execute-api.sa-east-1.amazonaws.com/httpapi/get".to_string() + ), + ("http.method".to_string(), "GET".to_string()), + ("http.protocol".to_string(), "HTTP/1.1".to_string()), + ("http.source_ip".to_string(), "38.122.226.210".to_string()), + ("http.user_agent".to_string(), "curl/7.64.1".to_string()), + ("operation_name".to_string(), "aws.httpapi".to_string()), + ("request_id".to_string(), "FaHnXjKCGjQEJ7A=".to_string()), + ("resource_names".to_string(), "GET /httpapi/get".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + let tags = event.get_tags(); + let sorted_tags_array = tags + .iter() + .map(|(k, v)| format!("{}:{}", k, v)) + .collect::>() + .sort(); + + let expected = HashMap::from([ + ( + "http.url".to_string(), + "x02yirxc7a.execute-api.sa-east-1.amazonaws.com".to_string(), + ), + ( + "http_url_details.path".to_string(), + "/httpapi/get".to_string(), + ), + ("http.method".to_string(), "GET".to_string()), + ("http.route".to_string(), "GET /httpapi/get".to_string()), + ("http.user_agent".to_string(), "curl/7.64.1".to_string()), + ("http.referer".to_string(), "".to_string()), + ]); + let expected_sorted_array = expected + .iter() + .map(|(k, v)| format!("{}:{}", k, v)) + .collect::>() + .sort(); + + assert_eq!(sorted_tags_array, expected_sorted_array); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + assert_eq!( + event.get_arn("sa-east-1"), + "arn:aws:apigateway:sa-east-1::/restapis/x02yirxc7a/stages/$default" + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs new file mode 100644 index 000000000..ec5860d27 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -0,0 +1,51 @@ +use std::{collections::HashMap, hash::BuildHasher}; + +use datadog_trace_protobuf::pb::Span; +use serde::{ser::SerializeMap, Serializer}; +use serde_json::Value; + +pub mod api_gateway_http_event; + +pub trait Trigger: Sized { + fn new(payload: Value) -> Option; + fn is_match(payload: &Value) -> bool; + fn enrich_span(&self, span: &mut Span); + fn get_tags(&self) -> HashMap; + fn get_arn(&self, region: &str) -> String; + fn is_async(&self) -> bool; +} + +#[must_use] +pub fn get_aws_partition_by_region(region: &str) -> String { + match region { + r if r.starts_with("us-gov-") => "aws-us-gov".to_string(), + r if r.starts_with("cn-") => "aws-cn".to_string(), + _ => "aws".to_string(), + } +} + +/// Serialize a `HashMap` with lowercase keys +/// +pub fn lowercase_key( + map: &HashMap, + serializer: S, +) -> Result +where + S: Serializer, + H: BuildHasher, +{ + let mut map_serializer = serializer.serialize_map(Some(map.len()))?; + for (key, value) in map { + map_serializer.serialize_entry(&key.to_lowercase(), value)?; + } + map_serializer.end() +} + +#[cfg(test)] +pub mod test_utils { + use std::fs; + + pub fn read_json_file(file_name: &str) -> String { + fs::read_to_string(format!("tests/payloads/{}", file_name)).expect("Failed to read file") + } +} diff --git a/bottlecap/src/lifecycle/listener.rs b/bottlecap/src/lifecycle/listener.rs index ebb3dd833..e52ad9625 100644 --- a/bottlecap/src/lifecycle/listener.rs +++ b/bottlecap/src/lifecycle/listener.rs @@ -79,17 +79,36 @@ impl Listener { } async fn start_invocation_handler( - _: Request, + req: Request, invocation_processor: Arc>, ) -> http::Result> { debug!("Received start invocation request"); - let mut processor = invocation_processor.lock().await; - processor.on_invocation_start(); - drop(processor); + let (_, body) = req.into_parts(); + match hyper::body::to_bytes(body).await { + Ok(b) => { + let body = b.to_vec(); + let mut processor = invocation_processor.lock().await; + + processor.on_invocation_start(body); + + let mut response = Response::builder().status(200); + if processor.span.trace_id != 0 { + response = + response.header("x-datadog-trace-id", processor.span.trace_id.to_string()); + } - Response::builder() - .status(200) - .body(Body::from(json!({}).to_string())) + drop(processor); + + response.body(Body::from(json!({}).to_string())) + } + Err(e) => { + error!("Could not read start invocation request body {e}"); + + Response::builder() + .status(400) + .body(Body::from("Could not read start invocation request body")) + } + } } async fn end_invocation_handler( diff --git a/bottlecap/tests/payloads/api_gateway_http_event.json b/bottlecap/tests/payloads/api_gateway_http_event.json new file mode 100644 index 000000000..061a02522 --- /dev/null +++ b/bottlecap/tests/payloads/api_gateway_http_event.json @@ -0,0 +1,38 @@ +{ + "version": "2.0", + "routeKey": "GET /httpapi/get", + "rawPath": "/httpapi/get", + "rawQueryString": "", + "headers": { + "accept": "*/*", + "content-length": "0", + "host": "x02yirxc7a.execute-api.sa-east-1.amazonaws.com", + "user-agent": "curl/7.64.1", + "x-amzn-trace-id": "Root=1-613a52fb-4c43cfc95e0241c1471bfa05", + "x-forwarded-for": "38.122.226.210", + "x-forwarded-port": "443", + "x-forwarded-proto": "https", + "x-datadog-trace-id": "12345", + "x-datadog-parent-id": "67890", + "x-datadog-sampling-priority": "2" + }, + "requestContext": { + "accountId": "425362996713", + "apiId": "x02yirxc7a", + "domainName": "x02yirxc7a.execute-api.sa-east-1.amazonaws.com", + "domainPrefix": "x02yirxc7a", + "http": { + "method": "GET", + "path": "/httpapi/get", + "protocol": "HTTP/1.1", + "sourceIp": "38.122.226.210", + "userAgent": "curl/7.64.1" + }, + "requestId": "FaHnXjKCGjQEJ7A=", + "routeKey": "GET /httpapi/get", + "stage": "$default", + "time": "09/Sep/2021:18:31:23 +0000", + "timeEpoch": 1631212283738 + }, + "isBase64Encoded": false +} diff --git a/bottlecap/tests/payloads/api_gateway_proxy_event.json b/bottlecap/tests/payloads/api_gateway_proxy_event.json new file mode 100644 index 000000000..de1155eb1 --- /dev/null +++ b/bottlecap/tests/payloads/api_gateway_proxy_event.json @@ -0,0 +1,127 @@ +{ + "body": "eyJ0ZXN0IjoiYm9keSJ9", + "resource": "/{proxy+}", + "path": "/path/to/resource", + "httpMethod": "POST", + "isBase64Encoded": true, + "queryStringParameters": { + "foo": "bar" + }, + "multiValueQueryStringParameters": { + "foo": [ + "bar" + ] + }, + "pathParameters": { + "proxy": "/path/to/resource" + }, + "stageVariables": { + "baz": "qux" + }, + "headers": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Encoding": "gzip, deflate, sdch", + "Accept-Language": "en-US,en;q=0.8", + "Cache-Control": "max-age=0", + "CloudFront-Forwarded-Proto": "https", + "CloudFront-Is-Desktop-Viewer": "true", + "CloudFront-Is-Mobile-Viewer": "false", + "CloudFront-Is-SmartTV-Viewer": "false", + "CloudFront-Is-Tablet-Viewer": "false", + "CloudFront-Viewer-Country": "US", + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Custom User Agent String", + "Via": "1.1 08f323deadbeefa7af34d5feb414ce27.cloudfront.net (CloudFront)", + "X-Amz-Cf-Id": "cDehVQoZnx43VYQb9j2-nvCh-9z396Uhbp027Y2JvkCPNLmGJHqlaA==", + "X-Forwarded-For": "127.0.0.1, 127.0.0.2", + "X-Forwarded-Port": "443", + "X-Forwarded-Proto": "https", + "X-Datadog-Trace-Id": "12345", + "X-Datadog-Parent-Id": "67890", + "x-datadog-sampling-priority": "2" + }, + "multiValueHeaders": { + "Accept": [ + "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" + ], + "Accept-Encoding": [ + "gzip, deflate, sdch" + ], + "Accept-Language": [ + "en-US,en;q=0.8" + ], + "Cache-Control": [ + "max-age=0" + ], + "CloudFront-Forwarded-Proto": [ + "https" + ], + "CloudFront-Is-Desktop-Viewer": [ + "true" + ], + "CloudFront-Is-Mobile-Viewer": [ + "false" + ], + "CloudFront-Is-SmartTV-Viewer": [ + "false" + ], + "CloudFront-Is-Tablet-Viewer": [ + "false" + ], + "CloudFront-Viewer-Country": [ + "US" + ], + "Host": [ + "0123456789.execute-api.us-east-1.amazonaws.com" + ], + "Upgrade-Insecure-Requests": [ + "1" + ], + "User-Agent": [ + "Custom User Agent String" + ], + "Via": [ + "1.1 08f323deadbeefa7af34d5feb414ce27.cloudfront.net (CloudFront)" + ], + "X-Amz-Cf-Id": [ + "cDehVQoZnx43VYQb9j2-nvCh-9z396Uhbp027Y2JvkCPNLmGJHqlaA==" + ], + "X-Forwarded-For": [ + "127.0.0.1, 127.0.0.2" + ], + "X-Forwarded-Port": [ + "443" + ], + "X-Forwarded-Proto": [ + "https" + ] + }, + "requestContext": { + "accountId": "123456789012", + "resourceId": "123456", + "stage": "prod", + "requestId": "c6af9ac6-7b61-11e6-9a41-93e8deadbeef", + "requestTime": "09/Apr/2015:12:34:56 +0000", + "requestTimeEpoch": 1428582896000, + "identity": { + "cognitoIdentityPoolId": null, + "accountId": null, + "cognitoIdentityId": null, + "caller": null, + "accessKey": null, + "sourceIp": "127.0.0.1", + "cognitoAuthenticationType": null, + "cognitoAuthenticationProvider": null, + "userArn": null, + "userAgent": "Custom User Agent String", + "user": null + }, + "domainName": "70ixmpl4fl.execute-api.us-east-2.amazonaws.com", + "path": "/prod/path/to/resource", + "resourcePath": "/{proxy+}", + "httpMethod": "POST", + "apiId": "1234567890", + "protocol": "HTTP/1.1" + } +} From 903dd8b694c68a2a52f6b2a07bc56d1b201fc1f7 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Mon, 21 Oct 2024 11:17:08 -0400 Subject: [PATCH 08/41] APIGW v1 + httpAPI/v1 API parameterized routes (#419) * feat: support APIGW v1 * feat: Tests for unparameterized payload working * feat: parameterized test * fix: specs * fix: unwrap_or_default, route has no http verb but is parameterized. * fix: lint * fix: Remove debugs, consolidate import * fix: oneline --- .../src/lifecycle/invocation/processor.rs | 11 +- .../src/lifecycle/invocation/span_inferrer.rs | 34 +- .../triggers/api_gateway_http_event.rs | 119 ++++-- .../triggers/api_gateway_rest_event.rs | 359 ++++++++++++++++++ .../src/lifecycle/invocation/triggers/mod.rs | 1 + bottlecap/src/lifecycle/listener.rs | 11 +- .../api_gateway_http_event_parameterized.json | 38 ++ .../payloads/api_gateway_rest_event.json | 80 ++++ .../api_gateway_rest_event_parameterized.json | 111 ++++++ 9 files changed, 731 insertions(+), 33 deletions(-) create mode 100644 bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs create mode 100644 bottlecap/tests/payloads/api_gateway_http_event_parameterized.json create mode 100644 bottlecap/tests/payloads/api_gateway_rest_event.json create mode 100644 bottlecap/tests/payloads/api_gateway_rest_event_parameterized.json diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 33f94987a..9b9cc98ef 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -183,12 +183,21 @@ impl Processor { /// Given trace context information, set it to the current span. /// - pub fn on_invocation_end(&mut self, trace_id: u64, span_id: u64, parent_id: u64) { + pub fn on_invocation_end( + &mut self, + trace_id: u64, + span_id: u64, + parent_id: u64, + status_code: Option, + ) { self.span.trace_id = trace_id; self.span.span_id = span_id; if self.inferrer.get_inferred_span().is_some() { self.inferrer.set_parent_id(parent_id); + if let Some(status_code) = status_code { + self.inferrer.set_status_code(status_code); + } } else { self.span.parent_id = parent_id; } diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index 91bf3b722..7b2a0eefc 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -1,12 +1,13 @@ use datadog_trace_protobuf::pb::Span; -use log::debug; use rand::Rng; use serde_json::Value; +use tracing::debug; use crate::config::AwsConfig; use crate::lifecycle::invocation::triggers::{ - api_gateway_http_event::APIGatewayHttpEvent, Trigger, + api_gateway_http_event::APIGatewayHttpEvent, api_gateway_rest_event::APIGatewayRestEvent, + Trigger, }; const FUNCTION_TRIGGER_EVENT_SOURCE_TAG: &str = "function_trigger.event_source"; @@ -58,6 +59,28 @@ impl SpanInferrer { ), ]); + self.is_async_span = t.is_async(); + self.inferred_span = Some(span); + } + } else if APIGatewayRestEvent::is_match(&payload_value) { + if let Some(t) = APIGatewayRestEvent::new(payload_value) { + let mut span = Span { + span_id: Self::generate_span_id(), + ..Default::default() + }; + + t.enrich_span(&mut span); + span.meta.extend([ + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "api_gateway".to_string(), + ), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), + t.get_arn(&aws_config.region), + ), + ]); + self.is_async_span = t.is_async(); self.inferred_span = Some(span); } @@ -78,6 +101,13 @@ impl SpanInferrer { } } + pub fn set_status_code(&mut self, status_code: String) { + if let Some(s) = &mut self.inferred_span { + s.meta.insert("http.status_code".to_string(), status_code); + } + } + + // TODO add status tag and other info from response pub fn complete_inferred_span(&mut self, invocation_span: &Span) { if let Some(s) = &mut self.inferred_span { if self.is_async_span { diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs index 434d636af..effc3e3c8 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs @@ -63,13 +63,18 @@ impl Trigger for APIGatewayHttpEvent { #[allow(clippy::cast_possible_truncation)] fn enrich_span(&self, span: &mut Span) { debug!("Enriching an Inferred Span for an API Gateway HTTP Event"); - let resource = format!( - "{http_method} {path}", - http_method = self.request_context.http.method, - path = self.request_context.http.path - ); + let resource = if self.route_key.is_empty() { + format!( + "{http_method} {route_key}", + http_method = self.request_context.http.method, + route_key = self.route_key + ) + } else { + self.route_key.clone() + }; + let http_url = format!( - "{domain_name}{path}", + "https://{domain_name}{path}", domain_name = self.request_context.domain_name, path = self.request_context.http.path ); @@ -119,10 +124,16 @@ impl Trigger for APIGatewayHttpEvent { let mut tags = HashMap::from([ ( "http.url".to_string(), - self.request_context.domain_name.clone(), + format!( + "https://{domain_name}{path}", + domain_name = self.request_context.domain_name.clone(), + path = self.request_context.http.path.clone() + ), ), + // path and URL are full + // /users/12345/profile ( - "http_url_details.path".to_string(), + "http.url_details.path".to_string(), self.request_context.http.path.clone(), ), ( @@ -130,9 +141,18 @@ impl Trigger for APIGatewayHttpEvent { self.request_context.http.method.clone(), ), ]); - + // route is parameterized + // /users/{id}/profile if !self.route_key.is_empty() { - tags.insert("http.route".to_string(), self.route_key.clone()); + tags.insert( + "http.route".to_string(), + self.route_key + .clone() + .split_whitespace() + .last() + .unwrap_or(&self.route_key.clone()) + .to_string(), + ); } if let Some(referer) = self.headers.get("referer") { @@ -202,7 +222,7 @@ mod tests { request_id: "FaHnXjKCGjQEJ7A=".to_string(), api_id: "x02yirxc7a".to_string(), domain_name: "x02yirxc7a.execute-api.sa-east-1.amazonaws.com".to_string(), - time_epoch: 1631212283738, + time_epoch: 1_631_212_283_738, http: RequestContextHTTP { method: "GET".to_string(), path: "/httpapi/get".to_string(), @@ -254,7 +274,8 @@ mod tests { ("endpoint".to_string(), "/httpapi/get".to_string()), ( "http.url".to_string(), - "x02yirxc7a.execute-api.sa-east-1.amazonaws.com/httpapi/get".to_string() + "https://x02yirxc7a.execute-api.sa-east-1.amazonaws.com/httpapi/get" + .to_string() ), ("http.method".to_string(), "GET".to_string()), ("http.protocol".to_string(), "HTTP/1.1".to_string()), @@ -274,35 +295,77 @@ mod tests { let event = APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); let tags = event.get_tags(); - let sorted_tags_array = tags - .iter() - .map(|(k, v)| format!("{}:{}", k, v)) - .collect::>() - .sort(); - let expected = HashMap::from([ ( "http.url".to_string(), - "x02yirxc7a.execute-api.sa-east-1.amazonaws.com".to_string(), + "https://x02yirxc7a.execute-api.sa-east-1.amazonaws.com/httpapi/get".to_string(), ), ( - "http_url_details.path".to_string(), + "http.url_details.path".to_string(), "/httpapi/get".to_string(), ), ("http.method".to_string(), "GET".to_string()), - ("http.route".to_string(), "GET /httpapi/get".to_string()), + ("http.route".to_string(), "/httpapi/get".to_string()), ("http.user_agent".to_string(), "curl/7.64.1".to_string()), - ("http.referer".to_string(), "".to_string()), ]); - let expected_sorted_array = expected - .iter() - .map(|(k, v)| format!("{}:{}", k, v)) - .collect::>() - .sort(); - assert_eq!(sorted_tags_array, expected_sorted_array); + assert_eq!(tags, expected); + } + + #[test] + fn test_enrich_span_parameterized() { + let json = read_json_file("api_gateway_http_event_parameterized.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + let mut span = Span::default(); + event.enrich_span(&mut span); + assert_eq!(span.name, "aws.httpapi"); + assert_eq!( + span.service, + "9vj54we5ih.execute-api.sa-east-1.amazonaws.com" + ); + assert_eq!(span.resource, "GET /user/{id}"); + assert_eq!(span.r#type, "http"); + assert_eq!( + span.meta, + HashMap::from([ + ("endpoint".to_string(), "/user/42".to_string()), + ( + "http.url".to_string(), + "https://9vj54we5ih.execute-api.sa-east-1.amazonaws.com/user/42".to_string() + ), + ("http.method".to_string(), "GET".to_string()), + ("http.protocol".to_string(), "HTTP/1.1".to_string()), + ("http.source_ip".to_string(), "76.115.124.192".to_string()), + ("http.user_agent".to_string(), "curl/8.1.2".to_string()), + ("operation_name".to_string(), "aws.httpapi".to_string()), + ("request_id".to_string(), "Ur2JtjEfGjQEPOg=".to_string()), + ("resource_names".to_string(), "GET /user/{id}".to_string()), + ]) + ); } + #[test] + fn test_get_tags_parameterized() { + let json = read_json_file("api_gateway_http_event_parameterized.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + let tags = event.get_tags(); + + let expected = HashMap::from([ + ( + "http.url".to_string(), + "https://9vj54we5ih.execute-api.sa-east-1.amazonaws.com/user/42".to_string(), + ), + ("http.url_details.path".to_string(), "/user/42".to_string()), + ("http.method".to_string(), "GET".to_string()), + ("http.route".to_string(), "/user/{id}".to_string()), + ("http.user_agent".to_string(), "curl/8.1.2".to_string()), + ]); + assert_eq!(tags, expected); + } #[test] fn test_get_arn() { let json = read_json_file("api_gateway_http_event.json"); diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs new file mode 100644 index 000000000..7a737d576 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs @@ -0,0 +1,359 @@ +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::MS_TO_NS, + triggers::{get_aws_partition_by_region, lowercase_key, Trigger}, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct APIGatewayRestEvent { + #[serde(serialize_with = "lowercase_key")] + pub headers: HashMap, + #[serde(rename = "requestContext")] + pub request_context: RequestContext, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct RequestContext { + pub stage: String, + #[serde(rename = "requestId")] + pub request_id: String, + #[serde(rename = "apiId")] + pub api_id: String, + #[serde(rename = "domainName")] + pub domain_name: String, + #[serde(rename = "requestTimeEpoch")] + pub time_epoch: i64, + #[serde(rename = "httpMethod")] + pub method: String, + #[serde(rename = "resourcePath")] + pub resource_path: String, + pub path: String, + pub protocol: String, + pub identity: Identity, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Identity { + #[serde(rename = "sourceIp")] + pub source_ip: String, + #[serde(rename = "userAgent")] + pub user_agent: String, +} + +impl Trigger for APIGatewayRestEvent { + fn new(payload: Value) -> Option { + match serde_json::from_value(payload) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize APIGatewayRestEvent: {}", e); + None + } + } + } + + fn is_match(payload: &Value) -> bool { + let stage = payload.get("requestContext").and_then(|v| v.get("stage")); + let http_method = payload.get("httpMethod"); + let resource = payload.get("resource"); + stage.is_some() && http_method.is_some() && resource.is_some() + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span) { + debug!("Enriching an Inferred Span for an API Gateway REST Event"); + let resource = format!( + "{http_method} {path}", + http_method = self.request_context.method, + path = self.request_context.resource_path + ); + let http_url = format!( + "https://{domain_name}{path}", + domain_name = self.request_context.domain_name, + path = self.request_context.path + ); + let start_time = (self.request_context.time_epoch as f64 * MS_TO_NS) as i64; + // todo: service mapping + let service_name = self.request_context.domain_name.clone(); + + span.name = "aws.apigateway".to_string(); + span.service = service_name; + span.resource.clone_from(&resource); + span.r#type = "http".to_string(); + span.start = start_time; + span.meta.extend(HashMap::from([ + ("endpoint".to_string(), self.request_context.path.clone()), + ("http.url".to_string(), http_url), + ( + "http.method".to_string(), + self.request_context.method.clone(), + ), + ( + "http.protocol".to_string(), + self.request_context.protocol.clone(), + ), + ( + "http.source_ip".to_string(), + self.request_context.identity.source_ip.clone(), + ), + ( + "http.user_agent".to_string(), + self.request_context.identity.user_agent.clone(), + ), + ("operation_name".to_string(), "aws.apigateway".to_string()), + ( + "request_id".to_string(), + self.request_context.request_id.clone(), + ), + ("resource_names".to_string(), resource.clone()), + ( + "http.route".to_string(), + self.request_context.resource_path.clone(), + ), + ])); + + debug!("Enriched Span: {:?}", span); + // todo: update global(? IsAsync if event payload is `Event` + } + + fn get_tags(&self) -> HashMap { + let mut tags = HashMap::from([ + ( + "http.url".to_string(), + format!( + "https://{domain_name}{path}", + domain_name = self.request_context.domain_name, + path = self.request_context.path + ), + ), + ( + "http.url_details.path".to_string(), + self.request_context.path.clone(), + ), + ( + "http.method".to_string(), + self.request_context.method.clone(), + ), + ( + "http.route".to_string(), + self.request_context.resource_path.clone(), + ), + ( + "http.user_agent".to_string(), + self.request_context.identity.user_agent.to_string(), + ), + ]); + + if let Some(referer) = self.headers.get("referer") { + tags.insert("http.referer".to_string(), referer.to_string()); + } + + tags + } + + fn get_arn(&self, region: &str) -> String { + let partition = get_aws_partition_by_region(region); + format!( + "arn:{partition}:apigateway:{region}::/restapis/{api_id}/stages/{stage}", + partition = partition, + region = region, + api_id = self.request_context.api_id, + stage = self.request_context.stage + ) + } + + fn is_async(&self) -> bool { + self.headers + .get("x-amz-invocation-type") + .is_some_and(|v| v == "Event") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = APIGatewayRestEvent::new(payload) + .expect("Failed to deserialize into APIGatewayRestEvent"); + + let expected = APIGatewayRestEvent { + headers: HashMap::from([ + ("Header1".to_string(), "value1".to_string()), + ("Header2".to_string(), "value2".to_string()), + ]), + request_context: RequestContext { + stage: "$default".to_string(), + request_id: "id=".to_string(), + api_id: "id".to_string(), + domain_name: "id.execute-api.us-east-1.amazonaws.com".to_string(), + time_epoch: 1_583_349_317_135, + method: "GET".to_string(), + path: "/my/path".to_string(), + protocol: "HTTP/1.1".to_string(), + resource_path: "/path".to_string(), + identity: Identity { + source_ip: "IP".to_string(), + user_agent: "user-agent".to_string(), + }, + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize APIGatewayRestEvent"); + + assert!(APIGatewayRestEvent::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize APIGatewayRestEvent"); + assert!(!APIGatewayRestEvent::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + let mut span = Span::default(); + event.enrich_span(&mut span); + assert_eq!(span.name, "aws.apigateway"); + assert_eq!(span.service, "id.execute-api.us-east-1.amazonaws.com"); + assert_eq!(span.resource, "GET /path"); + assert_eq!(span.r#type, "http"); + + assert_eq!( + span.meta, + HashMap::from([ + ("endpoint".to_string(), "/my/path".to_string()), + ( + "http.url".to_string(), + "https://id.execute-api.us-east-1.amazonaws.com/my/path".to_string() + ), + ("http.method".to_string(), "GET".to_string()), + ("http.protocol".to_string(), "HTTP/1.1".to_string()), + ("http.source_ip".to_string(), "IP".to_string()), + ("http.user_agent".to_string(), "user-agent".to_string()), + ("http.route".to_string(), "/path".to_string()), + ("operation_name".to_string(), "aws.apigateway".to_string()), + ("request_id".to_string(), "id=".to_string()), + ("resource_names".to_string(), "GET /path".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + let tags = event.get_tags(); + + let expected = HashMap::from([ + ( + "http.url".to_string(), + "https://id.execute-api.us-east-1.amazonaws.com/my/path".to_string(), + ), + ("http.url_details.path".to_string(), "/my/path".to_string()), + ("http.method".to_string(), "GET".to_string()), + ("http.route".to_string(), "/path".to_string()), + ("http.user_agent".to_string(), "user-agent".to_string()), + ]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_enrich_parameterized_span() { + let json = read_json_file("api_gateway_rest_event_parameterized.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + let mut span = Span::default(); + event.enrich_span(&mut span); + assert_eq!(span.name, "aws.apigateway"); + assert_eq!( + span.service, + "mcwkra0ya4.execute-api.sa-east-1.amazonaws.com" + ); + assert_eq!(span.resource, "GET /user/{id}"); + assert_eq!(span.r#type, "http"); + let expected = HashMap::from([ + ("endpoint".to_string(), "/dev/user/42".to_string()), + ( + "http.url".to_string(), + "https://mcwkra0ya4.execute-api.sa-east-1.amazonaws.com/dev/user/42".to_string(), + ), + ("http.method".to_string(), "GET".to_string()), + ("http.protocol".to_string(), "HTTP/1.1".to_string()), + ("http.source_ip".to_string(), "76.115.124.192".to_string()), + ("http.user_agent".to_string(), "curl/8.1.2".to_string()), + ("http.route".to_string(), "/user/{id}".to_string()), + ("operation_name".to_string(), "aws.apigateway".to_string()), + ( + "request_id".to_string(), + "e16399f7-e984-463a-9931-745ba021a27f".to_string(), + ), + ("resource_names".to_string(), "GET /user/{id}".to_string()), + ]); + assert_eq!(span.meta, expected); + } + + #[test] + fn test_get_tags_parameterized() { + let json = read_json_file("api_gateway_rest_event_parameterized.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + let tags = event.get_tags(); + + assert_eq!( + tags, + HashMap::from([ + ( + "http.url".to_string(), + "https://mcwkra0ya4.execute-api.sa-east-1.amazonaws.com/dev/user/42" + .to_string(), + ), + ( + "http.url_details.path".to_string(), + "/dev/user/42".to_string(), + ), + ("http.method".to_string(), "GET".to_string()), + ("http.route".to_string(), "/user/{id}".to_string()), + ("http.user_agent".to_string(), "curl/8.1.2".to_string()), + ]) + ); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:apigateway:us-east-1::/restapis/id/stages/$default" + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index ec5860d27..5eb32ec6b 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -5,6 +5,7 @@ use serde::{ser::SerializeMap, Serializer}; use serde_json::Value; pub mod api_gateway_http_event; +pub mod api_gateway_rest_event; pub trait Trigger: Sized { fn new(payload: Value) -> Option; diff --git a/bottlecap/src/lifecycle/listener.rs b/bottlecap/src/lifecycle/listener.rs index e52ad9625..4b11717f1 100644 --- a/bottlecap/src/lifecycle/listener.rs +++ b/bottlecap/src/lifecycle/listener.rs @@ -116,7 +116,14 @@ impl Listener { invocation_processor: Arc>, ) -> http::Result> { debug!("Received end invocation request"); - let (parts, _) = req.into_parts(); + let (parts, body) = req.into_parts(); + let parsed_body = serde_json::from_slice::( + &hyper::body::to_bytes(body).await.unwrap_or_default(), + ); + let mut parsed_status: Option = None; + if let Some(status_code) = parsed_body.unwrap_or_default().get("statusCode") { + parsed_status = Some(status_code.to_string()); + } let headers = parts.headers; let mut processor = invocation_processor.lock().await; @@ -142,7 +149,7 @@ impl Listener { } } - processor.on_invocation_end(trace_id, span_id, parent_id); + processor.on_invocation_end(trace_id, span_id, parent_id, parsed_status); drop(processor); Response::builder() diff --git a/bottlecap/tests/payloads/api_gateway_http_event_parameterized.json b/bottlecap/tests/payloads/api_gateway_http_event_parameterized.json new file mode 100644 index 000000000..89ff72b9c --- /dev/null +++ b/bottlecap/tests/payloads/api_gateway_http_event_parameterized.json @@ -0,0 +1,38 @@ +{ + "version": "2.0", + "routeKey": "GET /user/{id}", + "rawPath": "/user/42", + "rawQueryString": "", + "headers": { + "accept": "*/*", + "content-length": "0", + "host": "9vj54we5ih.execute-api.sa-east-1.amazonaws.com", + "user-agent": "curl/8.1.2", + "x-amzn-trace-id": "Root=1-65f49d71-505edb3b69b8abd513cfa08b", + "x-forwarded-for": "76.115.124.192", + "x-forwarded-port": "443", + "x-forwarded-proto": "https" + }, + "requestContext": { + "accountId": "425362996713", + "apiId": "9vj54we5ih", + "domainName": "9vj54we5ih.execute-api.sa-east-1.amazonaws.com", + "domainPrefix": "9vj54we5ih", + "http": { + "method": "GET", + "path": "/user/42", + "protocol": "HTTP/1.1", + "sourceIp": "76.115.124.192", + "userAgent": "curl/8.1.2" + }, + "requestId": "Ur2JtjEfGjQEPOg=", + "routeKey": "GET /user/{id}", + "stage": "$default", + "time": "15/Mar/2024:19:11:45 +0000", + "timeEpoch": 1710529905066 + }, + "pathParameters": { + "id": "42" + }, + "isBase64Encoded": false +} diff --git a/bottlecap/tests/payloads/api_gateway_rest_event.json b/bottlecap/tests/payloads/api_gateway_rest_event.json new file mode 100644 index 000000000..df9c5bb88 --- /dev/null +++ b/bottlecap/tests/payloads/api_gateway_rest_event.json @@ -0,0 +1,80 @@ +{ + "version": "1.0", + "resource": "/my/path", + "path": "/my/path", + "httpMethod": "GET", + "headers": { + "Header1": "value1", + "Header2": "value2" + }, + "multiValueHeaders": { + "Header1": [ + "value1" + ], + "Header2": [ + "value1", + "value2" + ] + }, + "queryStringParameters": { + "parameter1": "value1", + "parameter2": "value" + }, + "multiValueQueryStringParameters": { + "parameter1": [ + "value1", + "value2" + ], + "parameter2": [ + "value" + ] + }, + "requestContext": { + "accountId": "123456789012", + "apiId": "id", + "authorizer": { + "claims": null, + "scopes": null + }, + "domainName": "id.execute-api.us-east-1.amazonaws.com", + "domainPrefix": "id", + "extendedRequestId": "request-id", + "httpMethod": "GET", + "identity": { + "accessKey": null, + "accountId": null, + "caller": null, + "cognitoAuthenticationProvider": null, + "cognitoAuthenticationType": null, + "cognitoIdentityId": null, + "cognitoIdentityPoolId": null, + "principalOrgId": null, + "sourceIp": "IP", + "user": null, + "userAgent": "user-agent", + "userArn": null, + "clientCert": { + "clientCertPem": "CERT_CONTENT", + "subjectDN": "www.example.com", + "issuerDN": "Example issuer", + "serialNumber": "a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1", + "validity": { + "notBefore": "May 28 12:30:02 2019 GMT", + "notAfter": "Aug 5 09:36:04 2021 GMT" + } + } + }, + "path": "/my/path", + "protocol": "HTTP/1.1", + "requestId": "id=", + "requestTime": "04/Mar/2020:19:15:17 +0000", + "requestTimeEpoch": 1583349317135, + "resourceId": null, + "resourcePath": "/path", + "stage": "$default" + }, + "pathParameters": null, + "stageVariables": null, + "body": "Hello from Lambda!", + "isBase64Encoded": false +} diff --git a/bottlecap/tests/payloads/api_gateway_rest_event_parameterized.json b/bottlecap/tests/payloads/api_gateway_rest_event_parameterized.json new file mode 100644 index 000000000..65527ccb6 --- /dev/null +++ b/bottlecap/tests/payloads/api_gateway_rest_event_parameterized.json @@ -0,0 +1,111 @@ +{ + "resource": "/user/{id}", + "path": "/user/42", + "httpMethod": "GET", + "headers": { + "Accept": "*/*", + "CloudFront-Forwarded-Proto": "https", + "CloudFront-Is-Desktop-Viewer": "true", + "CloudFront-Is-Mobile-Viewer": "false", + "CloudFront-Is-SmartTV-Viewer": "false", + "CloudFront-Is-Tablet-Viewer": "false", + "CloudFront-Viewer-ASN": "7922", + "CloudFront-Viewer-Country": "US", + "Host": "mcwkra0ya4.execute-api.sa-east-1.amazonaws.com", + "User-Agent": "curl/8.1.2", + "Via": "2.0 xxx.cloudfront.net (CloudFront)", + "X-Amz-Cf-Id": "Tz3yUVcJkwOhQGqZgKTzrEHqAoOd8ZprYAHpg2S6BNxdd-Ym79pb6g==", + "X-Amzn-Trace-Id": "Root=1-65f49d20-7ba106216238dd0078a5db31", + "X-Forwarded-For": "76.115.124.192, 15.158.54.119", + "X-Forwarded-Port": "443", + "X-Forwarded-Proto": "https" + }, + "multiValueHeaders": { + "Accept": [ + "*/*" + ], + "CloudFront-Forwarded-Proto": [ + "https" + ], + "CloudFront-Is-Desktop-Viewer": [ + "true" + ], + "CloudFront-Is-Mobile-Viewer": [ + "false" + ], + "CloudFront-Is-SmartTV-Viewer": [ + "false" + ], + "CloudFront-Is-Tablet-Viewer": [ + "false" + ], + "CloudFront-Viewer-ASN": [ + "7922" + ], + "CloudFront-Viewer-Country": [ + "US" + ], + "Host": [ + "mcwkra0ya4.execute-api.sa-east-1.amazonaws.com" + ], + "User-Agent": [ + "curl/8.1.2" + ], + "Via": [ + "2.0 xxx.cloudfront.net (CloudFront)" + ], + "X-Amz-Cf-Id": [ + "Tz3yUVcJkwOhQGqZgKTzrEHqAoOd8ZprYAHpg2S6BNxdd-Ym79pb6g==" + ], + "X-Amzn-Trace-Id": [ + "Root=1-65f49d20-7ba106216238dd0078a5db31" + ], + "X-Forwarded-For": [ + "76.115.124.192, 15.158.54.119" + ], + "X-Forwarded-Port": [ + "443" + ], + "X-Forwarded-Proto": [ + "https" + ] + }, + "queryStringParameters": null, + "multiValueQueryStringParameters": null, + "pathParameters": { + "id": "42" + }, + "stageVariables": null, + "requestContext": { + "resourceId": "ojg3nk", + "resourcePath": "/user/{id}", + "httpMethod": "GET", + "extendedRequestId": "Ur19IHYDmjQEU5A=", + "requestTime": "15/Mar/2024:19:10:24 +0000", + "path": "/dev/user/42", + "accountId": "425362996713", + "protocol": "HTTP/1.1", + "stage": "dev", + "domainPrefix": "mcwkra0ya4", + "requestTimeEpoch": 1710529824520, + "requestId": "e16399f7-e984-463a-9931-745ba021a27f", + "identity": { + "cognitoIdentityPoolId": null, + "accountId": null, + "cognitoIdentityId": null, + "caller": null, + "sourceIp": "76.115.124.192", + "principalOrgId": null, + "accessKey": null, + "cognitoAuthenticationType": null, + "cognitoAuthenticationProvider": null, + "userArn": null, + "userAgent": "curl/8.1.2", + "user": null + }, + "domainName": "mcwkra0ya4.execute-api.sa-east-1.amazonaws.com", + "apiId": "mcwkra0ya4" + }, + "body": null, + "isBase64Encoded": false +} From b0f556a8229da22091e81ec390db6bdd103724a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Mon, 21 Oct 2024 13:58:06 -0400 Subject: [PATCH 09/41] feat(bottlecap): Add Composite Trace Propagator (#413) * add `trace_propagation_style.rs` * add Trace Propagation to `config.rs` also updated unit tests, as we have custom behavior, we should check only the fields we care about in the tests * add `links` to `SpanContext` * add composite propagator also known as our internal http propagator, but in reality, http doesnt make any sense to me, its just a composite propagator which we used based on our configuration * update `TextMapPropagator`s to comply with interface also updated the naming * fmt * add unit testing for `config.rs` * add `PartialEq` to `SpanContext` * correct logic from `text_map_propagator.rs` logic was wrong in some parts, this was discovered through unit tests * add unit tests for `DatadogCompositePropagator` also corrected some logic --- bottlecap/src/config/mod.rs | 191 ++-- .../src/config/trace_propagation_style.rs | 58 ++ bottlecap/src/traces/context.rs | 7 +- bottlecap/src/traces/propagation/mod.rs | 870 ++++++++++++++++++ .../traces/propagation/text_map_propagator.rs | 177 ++-- 5 files changed, 1134 insertions(+), 169 deletions(-) create mode 100644 bottlecap/src/config/trace_propagation_style.rs diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index 43f7c2548..597e954f3 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -1,12 +1,15 @@ pub mod flush_strategy; pub mod log_level; pub mod processing_rule; +pub mod trace_propagation_style; use std::path::Path; +use std::vec; use figment::providers::{Format, Yaml}; use figment::{providers::Env, Figment}; use serde::Deserialize; +use trace_propagation_style::{deserialize_trace_propagation_style, TracePropagationStyle}; use crate::config::flush_strategy::FlushStrategy; use crate::config::log_level::{deserialize_log_level, LogLevel}; @@ -62,6 +65,13 @@ pub struct Config { pub serverless_flush_strategy: FlushStrategy, pub enhanced_metrics: bool, pub https_proxy: Option, + // Trace Propagation + #[serde(deserialize_with = "deserialize_trace_propagation_style")] + pub trace_propagation_style: Vec, + #[serde(deserialize_with = "deserialize_trace_propagation_style")] + pub trace_propagation_style_extract: Vec, + pub trace_propagation_extract_first: bool, + pub trace_propagation_http_baggage_enabled: bool, } impl Default for Config { @@ -85,6 +95,14 @@ impl Default for Config { enhanced_metrics: true, // Failover https_proxy: None, + // Trace Propagation + trace_propagation_style: vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + ], + trace_propagation_style_extract: vec![], + trace_propagation_extract_first: false, + trace_propagation_http_baggage_enabled: false, } } } @@ -185,6 +203,15 @@ pub fn get_config(config_directory: &Path) -> Result { } } + // Trace Propagation + // + // If not set by the user, set defaults + if config.trace_propagation_style_extract.is_empty() { + config + .trace_propagation_style_extract + .clone_from(&config.trace_propagation_style); + } + Ok(config) } @@ -247,13 +274,7 @@ pub mod tests { )?; jail.set_env("DD_SITE", "datad0g.com"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - site: "datad0g.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.site, "datad0g.com"); Ok(()) }); } @@ -269,13 +290,7 @@ pub mod tests { ", )?; let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - site: "datadoghq.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.site, "datadoghq.com"); Ok(()) }); } @@ -287,13 +302,7 @@ pub mod tests { jail.set_env("DD_SITE", "datadoghq.eu"); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - site: "datadoghq.eu".to_string(), - ..Config::default() - } - ); + assert_eq!(config.site, "datadoghq.eu"); Ok(()) }); } @@ -305,14 +314,7 @@ pub mod tests { jail.set_env("DD_LOG_LEVEL", "TRACE"); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - log_level: LogLevel::Trace, - site: "datadoghq.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.log_level, LogLevel::Trace); Ok(()) }); } @@ -327,6 +329,10 @@ pub mod tests { config, Config { site: "datadoghq.com".to_string(), + trace_propagation_style_extract: vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext + ], ..Config::default() } ); @@ -341,14 +347,7 @@ pub mod tests { jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "end"); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - serverless_flush_strategy: FlushStrategy::End, - site: "datadoghq.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.serverless_flush_strategy, FlushStrategy::End); Ok(()) }); } @@ -361,14 +360,8 @@ pub mod tests { jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); assert_eq!( - config, - Config { - serverless_flush_strategy: FlushStrategy::Periodically(PeriodicStrategy { - interval: 100_000 - }), - site: "datadoghq.com".to_string(), - ..Config::default() - } + config.serverless_flush_strategy, + FlushStrategy::Periodically(PeriodicStrategy { interval: 100_000 }) ); Ok(()) }); @@ -381,13 +374,7 @@ pub mod tests { jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "invalid_strategy"); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - site: "datadoghq.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); Ok(()) }); } @@ -402,13 +389,7 @@ pub mod tests { ); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - site: "datadoghq.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); Ok(()) }); } @@ -435,17 +416,13 @@ pub mod tests { jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); assert_eq!( - config, - Config { - logs_config_processing_rules: Some(vec![ProcessingRule { - kind: processing_rule::Kind::ExcludeAtMatch, - name: "exclude".to_string(), - pattern: "exclude".to_string(), - replace_placeholder: None - }]), - site: "datadoghq.com".to_string(), - ..Config::default() - } + config.logs_config_processing_rules, + Some(vec![ProcessingRule { + kind: processing_rule::Kind::ExcludeAtMatch, + name: "exclude".to_string(), + pattern: "exclude".to_string(), + replace_placeholder: None + }]) ); Ok(()) }); @@ -469,39 +446,75 @@ pub mod tests { )?; let config = get_config(Path::new("")).expect("should parse config"); assert_eq!( - config, - Config { - logs_config_processing_rules: Some(vec![ProcessingRule { - kind: processing_rule::Kind::ExcludeAtMatch, - name: "exclude".to_string(), - pattern: "exclude".to_string(), - replace_placeholder: None - }]), - site: "datadoghq.com".to_string(), - ..Config::default() - } + config.logs_config_processing_rules, + Some(vec![ProcessingRule { + kind: processing_rule::Kind::ExcludeAtMatch, + name: "exclude".to_string(), + pattern: "exclude".to_string(), + replace_placeholder: None + }]), ); Ok(()) }); } #[test] - fn test_ignore_apm_replace_tags() { + fn test_parse_trace_propagation_style() { figment::Jail::expect_with(|jail| { jail.clear_env(); jail.set_env( - "DD_APM_REPLACE_TAGS", - r#"[{"name":"resource.name","pattern":"(.*)/(foo[:%].+)","repl":"$1/{foo}"}]"#, + "DD_TRACE_PROPAGATION_STYLE", + "datadog,tracecontext,b3,b3multi", ); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); + + let expected_styles = vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + TracePropagationStyle::B3, + TracePropagationStyle::B3Multi, + ]; + assert_eq!(config.trace_propagation_style, expected_styles); + assert_eq!(config.trace_propagation_style_extract, expected_styles); + Ok(()) + }); + } + + #[test] + fn test_parse_trace_propagation_style_extract() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_TRACE_PROPAGATION_STYLE_EXTRACT", "datadog"); + jail.set_env("DD_EXTENSION_VERSION", "next"); + let config = get_config(Path::new("")).expect("should parse config"); + assert_eq!( - config, - Config { - site: "datadoghq.com".to_string(), - ..Config::default() - } + config.trace_propagation_style, + vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + ] + ); + assert_eq!( + config.trace_propagation_style_extract, + vec![TracePropagationStyle::Datadog] + ); + Ok(()) + }); + } + + #[test] + fn test_ignore_apm_replace_tags() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env( + "DD_APM_REPLACE_TAGS", + r#"[{"name":"resource.name","pattern":"(.*)/(foo[:%].+)","repl":"$1/{foo}"}]"#, ); + jail.set_env("DD_EXTENSION_VERSION", "next"); + let config = get_config(Path::new("")); + assert!(config.is_ok()); Ok(()) }); } diff --git a/bottlecap/src/config/trace_propagation_style.rs b/bottlecap/src/config/trace_propagation_style.rs new file mode 100644 index 000000000..6ebc9dc74 --- /dev/null +++ b/bottlecap/src/config/trace_propagation_style.rs @@ -0,0 +1,58 @@ +use std::{fmt::Display, str::FromStr}; + +use serde::{Deserialize, Deserializer}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TracePropagationStyle { + Datadog, + B3Multi, + B3, + TraceContext, + None, +} + +impl FromStr for TracePropagationStyle { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "datadog" => Ok(TracePropagationStyle::Datadog), + "b3multi" => Ok(TracePropagationStyle::B3Multi), + "b3" => Ok(TracePropagationStyle::B3), + "tracecontext" => Ok(TracePropagationStyle::TraceContext), + "none" => Ok(TracePropagationStyle::None), + _ => Err(format!("Unknown trace propagation style: {s}")), + } + } +} + +impl Display for TracePropagationStyle { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let style = match self { + TracePropagationStyle::Datadog => "datadog", + TracePropagationStyle::B3Multi => "b3multi", + TracePropagationStyle::B3 => "b3", + TracePropagationStyle::TraceContext => "tracecontext", + TracePropagationStyle::None => "none", + }; + write!(f, "{style}") + } +} + +#[allow(clippy::module_name_repetitions)] +pub fn deserialize_trace_propagation_style<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: String = String::deserialize(deserializer)?; + + s.split(',') + .map(|style| { + TracePropagationStyle::from_str(style.trim()).map_err(|e| { + serde::de::Error::custom(format!("Failed to deserialize propagation style: {e}")) + }) + }) + .collect() +} diff --git a/bottlecap/src/traces/context.rs b/bottlecap/src/traces/context.rs index 35e9921ee..600ae1c39 100644 --- a/bottlecap/src/traces/context.rs +++ b/bottlecap/src/traces/context.rs @@ -1,12 +1,14 @@ use std::collections::HashMap; -#[derive(Copy, Clone, Default, Debug)] +use datadog_trace_protobuf::pb::SpanLink; + +#[derive(Copy, Clone, Default, Debug, PartialEq)] pub struct Sampling { pub priority: Option, pub mechanism: Option, } -#[derive(Clone, Default, Debug)] +#[derive(Clone, Default, Debug, PartialEq)] #[allow(clippy::module_name_repetitions)] pub struct SpanContext { pub trace_id: u64, @@ -14,4 +16,5 @@ pub struct SpanContext { pub sampling: Option, pub origin: Option, pub tags: HashMap, + pub links: Vec, } diff --git a/bottlecap/src/traces/propagation/mod.rs b/bottlecap/src/traces/propagation/mod.rs index ef268ef61..e93d81329 100644 --- a/bottlecap/src/traces/propagation/mod.rs +++ b/bottlecap/src/traces/propagation/mod.rs @@ -1,3 +1,873 @@ +use std::{collections::HashMap, sync::Arc}; + +use crate::{ + config::{self, trace_propagation_style::TracePropagationStyle}, + traces::context::SpanContext, +}; +use carrier::{Extractor, Injector}; +use datadog_trace_protobuf::pb::SpanLink; +use text_map_propagator::{ + BAGGAGE_PREFIX, DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY, DATADOG_LAST_PARENT_ID_KEY, + TRACESTATE_KEY, +}; + pub mod carrier; pub mod error; pub mod text_map_propagator; + +pub trait Propagator { + fn extract(&self, carrier: &dyn Extractor) -> Option; + fn inject(&self, context: SpanContext, carrier: &mut dyn Injector); +} + +pub struct DatadogCompositePropagator { + propagators: Vec>, + config: Arc, +} + +#[allow(clippy::never_loop)] +impl Propagator for DatadogCompositePropagator { + fn extract(&self, carrier: &dyn Extractor) -> Option { + if self.config.trace_propagation_extract_first { + for propagator in &self.propagators { + let context = propagator.extract(carrier); + + if self.config.trace_propagation_http_baggage_enabled { + if let Some(mut context) = context { + Self::attach_baggage(&mut context, carrier); + return Some(context); + } + } + + return context; + } + } + + let (contexts, styles) = self.extract_available_contexts(carrier); + if contexts.is_empty() { + return None; + } + + let mut context = Self::resolve_contexts(contexts, styles, carrier); + if self.config.trace_propagation_http_baggage_enabled { + Self::attach_baggage(&mut context, carrier); + } + + Some(context) + } + + fn inject(&self, _context: SpanContext, _carrier: &mut dyn Injector) { + todo!() + } +} + +impl DatadogCompositePropagator { + #[must_use] + pub fn new(config: Arc) -> Self { + let propagators: Vec> = config + .trace_propagation_style_extract + .iter() + .filter_map(|style| match style { + TracePropagationStyle::Datadog => { + Some(Box::new(text_map_propagator::DatadogHeaderPropagator) + as Box) + } + TracePropagationStyle::TraceContext => { + Some(Box::new(text_map_propagator::TraceContextPropagator) + as Box) + } + _ => None, + }) + .collect(); + + Self { + propagators, + config, + } + } + + fn extract_available_contexts( + &self, + carrier: &dyn Extractor, + ) -> (Vec, Vec) { + let mut contexts = Vec::::new(); + let mut styles = Vec::::new(); + + for (i, propagator) in self.propagators.iter().enumerate() { + if let Some(context) = propagator.extract(carrier) { + contexts.push(context); + styles.push(self.config.trace_propagation_style_extract[i]); + } + } + + (contexts, styles) + } + + fn resolve_contexts( + contexts: Vec, + styles: Vec, + _carrier: &dyn Extractor, + ) -> SpanContext { + let mut primary_context = contexts[0].clone(); + let mut links = Vec::::new(); + + let mut i = 1; + for context in contexts.iter().skip(1) { + let style = styles[i]; + + if context.span_id != 0 + && context.trace_id != 0 + && context.trace_id != primary_context.trace_id + { + let sampling = context.sampling.unwrap_or_default().priority.unwrap_or(0); + let tracestate: Option = match style { + TracePropagationStyle::TraceContext => { + context.tags.get(TRACESTATE_KEY).cloned() + } + _ => None, + }; + let attributes = HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), style.to_string()), + ]); + let trace_id_high_str = context + .tags + .get(DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY) + .cloned() + .unwrap_or_default(); + let trace_ig_high = u64::from_str_radix(&trace_id_high_str, 16).unwrap_or_default(); + + links.push(SpanLink { + trace_id: context.trace_id, + trace_id_high: trace_ig_high, + span_id: context.span_id, + flags: u32::from(sampling > 0), + tracestate: tracestate.unwrap_or_default(), + attributes, + }); + } else if style == TracePropagationStyle::TraceContext { + if let Some(tracestate) = context.tags.get(TRACESTATE_KEY) { + primary_context + .tags + .insert(TRACESTATE_KEY.to_string(), tracestate.clone()); + } + + if primary_context.trace_id == context.trace_id + && primary_context.span_id != context.span_id + { + let mut dd_context: Option = None; + if styles.contains(&TracePropagationStyle::Datadog) { + let position = styles + .iter() + .position(|&s| s == TracePropagationStyle::Datadog) + .unwrap_or_default(); + dd_context = contexts.get(position).cloned(); + } + + if let Some(parent_id) = context.tags.get(DATADOG_LAST_PARENT_ID_KEY) { + primary_context + .tags + .insert(DATADOG_LAST_PARENT_ID_KEY.to_string(), parent_id.clone()); + } else if let Some(sc) = dd_context { + primary_context.tags.insert( + DATADOG_LAST_PARENT_ID_KEY.to_string(), + format!("{:016x}", sc.span_id), + ); + } + + primary_context.span_id = context.span_id; + } + } + + i += 1; + } + + primary_context.links = links; + + primary_context + } + + fn attach_baggage(context: &mut SpanContext, carrier: &dyn Extractor) { + let keys = carrier.keys(); + + for key in keys { + if let Some(stripped) = key.strip_prefix(BAGGAGE_PREFIX) { + context.tags.insert( + stripped.to_string(), + carrier.get(key).unwrap_or_default().to_string(), + ); + } + } + } +} + +#[cfg(test)] +pub mod tests { + use std::vec; + + use lazy_static::lazy_static; + + use crate::traces::context::Sampling; + + use super::*; + + lazy_static! { + static ref TRACE_ID: u128 = 171395628812617415352188477958425669623; + static ref TRACE_ID_LOWER_ORDER_BITS: u64 = *TRACE_ID as u64; + static ref TRACE_ID_HEX: String = String::from("80f198ee56343ba864fe8b2a57d3eff7"); + + // TraceContext Headers + static ref VALID_TRACECONTEXT_HEADERS_BASIC: HashMap = HashMap::from([ + ( + "traceparent".to_string(), + format!("00-{}-00f067aa0ba902b7-01", *TRACE_ID_HEX) + ), + ( + "tracestate".to_string(), + "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string() + ), + ]); + static ref VALID_TRACECONTEXT_HEADERS_RUM_NO_SAMPLING_DECISION: HashMap = + HashMap::from([ + ( + "traceparent".to_string(), + format!("00-{}-00f067aa0ba902b7-00", *TRACE_ID_HEX) + ), + ( + "tracestate".to_string(), + "dd=o:rum".to_string() + ), + ]); + static ref VALID_TRACECONTEXT_HEADERS: HashMap = HashMap::from([ + ( + "traceparent".to_string(), + format!("00-{}-00f067aa0ba902b7-01", *TRACE_ID_HEX) + ), + ( + "tracestate".to_string(), + "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string() + ), + ]); + static ref VALID_TRACECONTEXT_HEADERS_VALID_64_BIT_TRACE_ID: HashMap = + HashMap::from([ + ( + "traceparent".to_string(), + "00-000000000000000064fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string() + ), + ( + "tracestate".to_string(), + "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMzE".to_string() + ), + ]); + + // Datadog Headers + static ref VALID_DATADOG_HEADERS: HashMap = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "13088165645273925489".to_string(), + ), + ("x-datadog-parent-id".to_string(), "5678".to_string(),), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ("x-datadog-origin".to_string(), "synthetics".to_string()), + ]); + static ref VALID_DATADOG_HEADERS_NO_PRIORITY: HashMap = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "13088165645273925489".to_string(), + ), + ("x-datadog-parent-id".to_string(), "5678".to_string(),), + ("x-datadog-origin".to_string(), "synthetics".to_string()), + ]); + static ref VALID_DATADOG_HEADERS_MATCHING_TRACE_CONTEXT_VALID_TRACE_ID: HashMap = + HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + TRACE_ID_LOWER_ORDER_BITS.to_string() + ), + ("x-datadog-parent-id".to_string(), "5678".to_string()), + ("x-datadog-origin".to_string(), "synthetics".to_string()), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + static ref INVALID_DATADOG_HEADERS: HashMap = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "13088165645273925489".to_string(), + ), + ("x-datadog-parent-id".to_string(), "parent_id".to_string(),), + ("x-datadog-sampling-priority".to_string(), "sample".to_string()), + ]); + + // Fixtures + // + static ref ALL_VALID_HEADERS: HashMap = { + let mut h = HashMap::new(); + h.extend(VALID_DATADOG_HEADERS.clone()); + h.extend(VALID_TRACECONTEXT_HEADERS.clone()); + // todo: add b3 + h + }; + static ref DATADOG_TRACECONTEXT_MATCHING_TRACE_ID_HEADERS: HashMap = { + let mut h = HashMap::new(); + h.extend(VALID_DATADOG_HEADERS_MATCHING_TRACE_CONTEXT_VALID_TRACE_ID.clone()); + // We use 64-bit traceparent trace id value here so it can match for + // both 128-bit enabled and disabled + h.extend(VALID_TRACECONTEXT_HEADERS_VALID_64_BIT_TRACE_ID.clone()); + h + }; + // Edge cases + static ref ALL_HEADERS_CHAOTIC_1: HashMap = { + let mut h = HashMap::new(); + h.extend(VALID_DATADOG_HEADERS_MATCHING_TRACE_CONTEXT_VALID_TRACE_ID.clone()); + h.extend(VALID_TRACECONTEXT_HEADERS_VALID_64_BIT_TRACE_ID.clone()); + // todo: add b3 + h + }; + static ref ALL_HEADERS_CHAOTIC_2: HashMap = { + let mut h = HashMap::new(); + h.extend(VALID_DATADOG_HEADERS.clone()); + h.extend(VALID_TRACECONTEXT_HEADERS_VALID_64_BIT_TRACE_ID.clone()); + // todo: add b3 + h + }; + static ref NO_TRACESTATE_SUPPORT_NOT_MATCHING_TRACE_ID: HashMap = { + let mut h = HashMap::new(); + h.extend(VALID_DATADOG_HEADERS.clone()); + h.extend(VALID_TRACECONTEXT_HEADERS_RUM_NO_SAMPLING_DECISION.clone()); + h + }; + } + + macro_rules! test_propagation_extract { + ($($name:ident: $value:expr,)*) => { + $( + #[test] + fn $name() { + let (styles, carrier, expected) = $value; + let mut config = config::Config::default(); + config.trace_propagation_style_extract = vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]; + if let Some(s) = styles { + config.trace_propagation_style_extract.clone_from(&s); + } + + let propagator = DatadogCompositePropagator::new(Arc::new(config)); + + let context = propagator.extract(&carrier).unwrap_or_default(); + + assert_eq!(context, expected); + } + )* + } + } + + test_propagation_extract! { + // Datadog Headers + valid_datadog_default: ( + None, + VALID_DATADOG_HEADERS.clone(), + SpanContext { + trace_id: 13088165645273925489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![], + } + ), + valid_datadog_no_priority: ( + None, + VALID_DATADOG_HEADERS_NO_PRIORITY.clone(), + SpanContext { + trace_id: 13088165645273925489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(2), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![], + }, + ), + invalid_datadog: ( + Some(vec![TracePropagationStyle::Datadog]), + INVALID_DATADOG_HEADERS.clone(), + SpanContext::default(), + ), + valid_datadog_explicit_style: ( + Some(vec![TracePropagationStyle::Datadog]), + VALID_DATADOG_HEADERS.clone(), + SpanContext { + trace_id: 13088165645273925489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![], + }, + ), + invalid_datadog_negative_trace_id: ( + Some(vec![TracePropagationStyle::Datadog]), + HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "-1".to_string(), + ), + ("x-datadog-parent-id".to_string(), "5678".to_string(),), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ("x-datadog-origin".to_string(), "synthetics".to_string()), + ]), + SpanContext::default(), + ), + valid_datadog_no_datadog_style: ( + Some(vec![TracePropagationStyle::TraceContext]), + VALID_DATADOG_HEADERS.clone(), + SpanContext::default(), + ), + // TraceContext Headers + valid_tracecontext_simple: ( + Some(vec![TracePropagationStyle::TraceContext]), + VALID_TRACECONTEXT_HEADERS_BASIC.clone(), + SpanContext { + trace_id: 7277407061855694839, + span_id: 67667974448284343, + sampling: Some(Sampling { + priority: Some(2), + mechanism: None, + }), + origin: Some("rum".to_string()), + tags: HashMap::from([ + ("tracestate".to_string(), "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string()), + ("_dd.p.tid".to_string(), "9291375655657946024".to_string()), + ("traceparent".to_string(), "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string()), + ("_dd.parent_id".to_string(), "00f067aa0ba902b7".to_string()), + ]), + links: vec![], + } + ), + valid_tracecontext_rum_no_sampling_decision: ( + Some(vec![TracePropagationStyle::TraceContext]), + VALID_TRACECONTEXT_HEADERS_RUM_NO_SAMPLING_DECISION.clone(), + SpanContext { + trace_id: 7277407061855694839, + span_id: 67667974448284343, + sampling: Some(Sampling { + priority: Some(0), + mechanism: None, + }), + origin: Some("rum".to_string()), + tags: HashMap::from([ + ("_dd.p.tid".to_string(), "9291375655657946024".to_string()), + ("tracestate".to_string(), "dd=o:rum".to_string()), + ("traceparent".to_string(), "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-00".to_string()), + ]), + links: vec![], + } + ), + // B3 Headers + // todo: all of them + // B3 single Headers + // todo: all of them + // All Headers + valid_all_headers: ( + None, + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 13088165645273925489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![ + SpanLink { + trace_id: 7277407061855694839, + trace_id_high: 0, + span_id: 67667974448284343, + flags: 1, + tracestate: "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "tracecontext".to_string()), + ]), + } + ], + }, + ), + valid_all_headers_all_styles: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 13088165645273925489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![ + SpanLink { + trace_id: 7277407061855694839, + trace_id_high: 0, + span_id: 67667974448284343, + flags: 1, + tracestate: "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "tracecontext".to_string()), + ]), + } + // todo: b3 span links + ], + }, + ), + valid_all_headers_datadog_style: ( + Some(vec![TracePropagationStyle::Datadog]), + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 13088165645273925489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![] + }, + ), + // todo: valid_all_headers_b3_style + // todo: valid_all_headers_both_b3_styles + // todo: valid_all_headers_b3_single_style + none_style: ( + Some(vec![TracePropagationStyle::None]), + ALL_VALID_HEADERS.clone(), + SpanContext::default(), + ), + valid_style_and_none_still_extracts: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::None]), + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 13088165645273925489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![], + } + ), + // Order matters + // todo: order_matters_b3_single_header_first + // todo: order_matters_b3_first + // todo: order_matters_b3_second_no_datadog_headers + // Tracestate is still added when TraceContext style comes later and matches + // first style's `trace_id` + additional_tracestate_support_when_present_and_matches_first_style_trace_id: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), + DATADOG_TRACECONTEXT_MATCHING_TRACE_ID_HEADERS.clone(), + SpanContext { + trace_id: 7277407061855694839, + span_id: 67667974448284343, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()), + ("_dd.parent_id".to_string(), "000000000000162e".to_string()), + (TRACESTATE_KEY.to_string(), "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMzE".to_string()) + ]), + links: vec![], + } + ), + // Tracestate is not added when TraceContext style comes later and does not + // match first style's `trace_id` + no_additional_tracestate_support_when_present_and_trace_id_does_not_match: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), + NO_TRACESTATE_SUPPORT_NOT_MATCHING_TRACE_ID.clone(), + SpanContext { + trace_id: 13088165645273925489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![ + SpanLink { + trace_id: 7277407061855694839, + trace_id_high: 0, + span_id: 67667974448284343, + flags: 0, + tracestate: "dd=o:rum".to_string(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "tracecontext".to_string()), + ]), + } + ], + } + ), + valid_all_headers_no_style: ( + Some(vec![]), + ALL_VALID_HEADERS.clone(), + SpanContext::default(), + ), + datadog_tracecontext_conflicting_span_ids: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), + HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "9291375655657946024".to_string(), + ), + ("x-datadog-parent-id".to_string(), "15".to_string(),), + ("traceparent".to_string(), "00-000000000000000080f198ee56343ba8-000000000000000a-01".to_string()), + ]), + SpanContext { + trace_id: 9291375655657946024, + span_id: 10, + sampling: Some(Sampling { + priority: Some(2), + mechanism: None, + }), + origin: None, + tags: HashMap::from([ + ("_dd.parent_id".to_string(), "000000000000000f".to_string()), + ("_dd.p.dm".to_string(), "-3".to_string()), + ]), + links: vec![], + } + ), + // todo: all_headers_all_styles_tracecontext_t_id_match_no_span_link + all_headers_all_styles_do_not_create_span_link_for_context_w_out_span_id: ( + Some(vec![TracePropagationStyle::TraceContext, TracePropagationStyle::Datadog]), + ALL_HEADERS_CHAOTIC_2.clone(), + SpanContext { + trace_id: 7277407061855694839, + span_id: 67667974448284343, + sampling: Some(Sampling { + priority: Some(2), + mechanism: None, + }), + origin: Some("rum".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-4".to_string()), + ("_dd.p.tid".to_string(), "0".to_string()), + ("_dd.p.usr.id".to_string(), "baz64".to_string()), + ("traceparent".to_string(), "00-000000000000000064fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string()), + ("tracestate".to_string(), "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMzE".to_string()), + ]), + links: vec![ + SpanLink { + trace_id: 13088165645273925489, + trace_id_high: 0, + span_id: 5678, + flags: 1, + tracestate: "".to_string(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "datadog".to_string()), + ]), + } + ], + } + ), + all_headers_all_styles_tracecontext_primary_only_datadog_t_id_diff: ( + Some(vec![TracePropagationStyle::TraceContext, TracePropagationStyle::Datadog]), + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 7277407061855694839, + span_id: 67667974448284343, + sampling: Some(Sampling { + priority: Some(2), + mechanism: None, + }), + origin: Some("rum".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-4".to_string()), + ("_dd.p.tid".to_string(), "9291375655657946024".to_string()), + ("_dd.p.usr.id".to_string(), "baz64".to_string()), + ("traceparent".to_string(), "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string()), + ("tracestate".to_string(), "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string()), + ]), + links: vec![ + SpanLink { + trace_id: 13088165645273925489, + trace_id_high: 0, + span_id: 5678, + flags: 1, + tracestate: "".to_string(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "datadog".to_string()), + ]), + } + ], + } + ), + // todo: fix this test + all_headers_all_styles_datadog_primary_only_datadog_t_id_diff: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 13088165645273925489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![ + SpanLink { + trace_id: 7277407061855694839, + // this should be `9291375655657946024` not `0`, but we don't have this data + // with the current definition of `SpanContext` + trace_id_high: 0, + span_id: 67667974448284343, + flags: 1, + tracestate: "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "tracecontext".to_string()), + ]), + } + ], + } + ), + // todo: datadog_primary_match_tracecontext_dif_from_b3_b3multi_invalid + } + + #[test] + fn test_new_filter_propagators() { + let mut config = config::Config::default(); + config.trace_propagation_style_extract = vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + TracePropagationStyle::B3, + TracePropagationStyle::B3Multi, + ]; + + let propagator = DatadogCompositePropagator::new(Arc::new(config)); + + assert_eq!(propagator.propagators.len(), 2); + } + + #[test] + fn test_new_no_propagators() { + let mut config = config::Config::default(); + config.trace_propagation_style_extract = vec![TracePropagationStyle::None]; + let propagator = DatadogCompositePropagator::new(Arc::new(config)); + + assert_eq!(propagator.propagators.len(), 0); + } + + #[test] + fn test_extract_available_contexts() { + let mut config = config::Config::default(); + config.trace_propagation_style_extract = vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + ]; + + let propagator = DatadogCompositePropagator::new(Arc::new(config)); + + let carrier = HashMap::from([ + ( + "traceparent".to_string(), + "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string(), + ), + ( + "tracestate".to_string(), + "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string(), + ), + ( + "x-datadog-trace-id".to_string(), + "7277407061855694839".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "67667974448284343".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "2".to_string()), + ("x-datadog-origin".to_string(), "rum".to_string()), + ( + "x-datadog-tags".to_string(), + "_dd.p.test=value,_dd.p.tid=9291375655657946024,any=tag".to_string(), + ), + ]); + let (contexts, styles) = propagator.extract_available_contexts(&carrier); + + assert_eq!(contexts.len(), 2); + assert_eq!(styles.len(), 2); + } + + #[test] + fn test_extract_available_contexts_no_contexts() { + let mut config = config::Config::default(); + config.trace_propagation_style_extract = vec![TracePropagationStyle::Datadog]; + + let propagator = DatadogCompositePropagator::new(Arc::new(config)); + + let carrier = HashMap::from([ + ( + "traceparent".to_string(), + "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string(), + ), + ( + "tracestate".to_string(), + "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string(), + ), + ]); + let (contexts, styles) = propagator.extract_available_contexts(&carrier); + + assert_eq!(contexts.len(), 0); + assert_eq!(styles.len(), 0); + } + + #[test] + fn test_attach_baggage() { + let mut context = SpanContext::default(); + let carrier = HashMap::from([ + ("x-datadog-trace-id".to_string(), "123".to_string()), + ("x-datadog-parent-id".to_string(), "5678".to_string()), + ("ot-baggage-key1".to_string(), "value1".to_string()), + ]); + + DatadogCompositePropagator::attach_baggage(&mut context, &carrier); + + assert_eq!(context.tags.len(), 1); + assert_eq!(context.tags.get("key1").unwrap(), "value1"); + } +} diff --git a/bottlecap/src/traces/propagation/text_map_propagator.rs b/bottlecap/src/traces/propagation/text_map_propagator.rs index ce3f5abd1..42b4a17fe 100644 --- a/bottlecap/src/traces/propagation/text_map_propagator.rs +++ b/bottlecap/src/traces/propagation/text_map_propagator.rs @@ -1,17 +1,16 @@ use std::collections::HashMap; use lazy_static::lazy_static; -use log::warn; use regex::Regex; -use tracing::{debug, error}; +use tracing::{debug, error, warn}; -use super::{ +use crate::traces::context::{Sampling, SpanContext}; +use crate::traces::propagation::{ carrier::{Extractor, Injector}, error::Error, + Propagator, }; -use crate::traces::context::{Sampling, SpanContext}; - // Datadog Keys const DATADOG_TRACE_ID_KEY: &str = "x-datadog-trace-id"; const DATADOG_PARENT_ID_KEY: &str = "x-datadog-parent-id"; @@ -19,14 +18,16 @@ const DATADOG_SAMPLING_PRIORITY_KEY: &str = "x-datadog-sampling-priority"; const DATADOG_ORIGIN_KEY: &str = "x-datadog-origin"; const DATADOG_TAGS_KEY: &str = "x-datadog-tags"; -const DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY: &str = "_dd.p.tid"; +pub const DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY: &str = "_dd.p.tid"; const DATADOG_PROPAGATION_ERROR_KEY: &str = "_dd.propagation_error"; -const DATADOG_LAST_PARENT_ID_KEY: &str = "_dd.parent_id"; +pub const DATADOG_LAST_PARENT_ID_KEY: &str = "_dd.parent_id"; const DATADOG_SAMPLING_DECISION_KEY: &str = "_dd.p.dm"; // Traceparent Keys const TRACEPARENT_KEY: &str = "traceparent"; -const TRACESTATE_KEY: &str = "tracestate"; +pub const TRACESTATE_KEY: &str = "tracestate"; + +pub const BAGGAGE_PREFIX: &str = "ot-baggage-"; lazy_static! { static ref TRACEPARENT_REGEX: Regex = @@ -43,17 +44,12 @@ lazy_static! { Regex::new(r"^-([0-9])$").expect("failed creating regex"); } -pub trait TextMapPropagator { - fn extract(&self, carrier: &dyn Extractor) -> SpanContext; - fn inject(&self, context: SpanContext, carrier: &mut dyn Injector); -} - #[derive(Clone, Copy)] -pub struct DatadogPropagator; +pub struct DatadogHeaderPropagator; -impl TextMapPropagator for DatadogPropagator { - fn extract(&self, carrier: &dyn Extractor) -> SpanContext { - Self::extract_context(carrier).unwrap_or_default() +impl Propagator for DatadogHeaderPropagator { + fn extract(&self, carrier: &dyn Extractor) -> Option { + Self::extract_context(carrier) } fn inject(&self, _context: SpanContext, _carrier: &mut dyn Injector) { @@ -61,29 +57,39 @@ impl TextMapPropagator for DatadogPropagator { } } -impl DatadogPropagator { +impl DatadogHeaderPropagator { fn extract_context(carrier: &dyn Extractor) -> Option { - if let Some(trace_id) = Self::extract_trace_id(carrier) { - let parent_id = Self::extract_parent_id(carrier).unwrap_or(0); - let origin = Self::extract_origin(carrier); - let mut tags = Self::extract_tags(carrier); - let sampling_priority = Self::extract_sampling_priority(carrier).unwrap_or(2); - - Self::validate_sampling_decision(&mut tags); - - return Some(SpanContext { - trace_id, - span_id: parent_id, - sampling: Some(Sampling { - priority: Some(sampling_priority), - mechanism: None, - }), - origin, - tags, - }); - } + let trace_id = match Self::extract_trace_id(carrier) { + Ok(trace_id) => trace_id, + Err(e) => { + debug!("{e}"); + return None; + } + }; - None + let parent_id = Self::extract_parent_id(carrier).unwrap_or(0); + let sampling_priority = match Self::extract_sampling_priority(carrier) { + Ok(sampling_priority) => sampling_priority, + Err(e) => { + debug!("{e}"); + return None; + } + }; + let origin = Self::extract_origin(carrier); + let mut tags = Self::extract_tags(carrier); + Self::validate_sampling_decision(&mut tags); + + Some(SpanContext { + trace_id, + span_id: parent_id, + sampling: Some(Sampling { + priority: Some(sampling_priority), + mechanism: None, + }), + origin, + tags, + links: Vec::new(), + }) } fn validate_sampling_decision(tags: &mut HashMap) { @@ -108,14 +114,18 @@ impl DatadogPropagator { // todo: appsec standalone } - fn extract_trace_id(carrier: &dyn Extractor) -> Option { - let trace_id = carrier.get(DATADOG_TRACE_ID_KEY)?; + fn extract_trace_id(carrier: &dyn Extractor) -> Result { + let trace_id = carrier + .get(DATADOG_TRACE_ID_KEY) + .ok_or(Error::extract("`trace_id` not found", "datadog"))?; if INVALID_SEGMENT_REGEX.is_match(trace_id) { - return None; + return Err(Error::extract("Invalid `trace_id` found", "datadog")); } - trace_id.parse::().ok() + trace_id + .parse::() + .map_err(|_| Error::extract("Failed to decode `trace_id`", "datadog")) } fn extract_parent_id(carrier: &dyn Extractor) -> Option { @@ -124,10 +134,13 @@ impl DatadogPropagator { parent_id.parse::().ok() } - fn extract_sampling_priority(carrier: &dyn Extractor) -> Option { - let sampling_priority = carrier.get(DATADOG_SAMPLING_PRIORITY_KEY)?; + fn extract_sampling_priority(carrier: &dyn Extractor) -> Result { + // todo: enum? Default is USER_KEEP=2 + let sampling_priority = carrier.get(DATADOG_SAMPLING_PRIORITY_KEY).unwrap_or("2"); - sampling_priority.parse::().ok() + sampling_priority + .parse::() + .map_err(|_| Error::extract("Failed to decode `sampling_priority`", "datadog")) } fn extract_origin(carrier: &dyn Extractor) -> Option { @@ -147,7 +160,7 @@ impl DatadogPropagator { for pair in pairs { if let Some((k, v)) = pair.split_once('=') { // todo: reject key on tags extract reject - if k.starts_with("_dd.p") { + if k.starts_with("_dd.p.") { tags.insert(k.to_string(), v.to_string()); } } @@ -203,11 +216,11 @@ struct Tracestate { } #[derive(Clone, Copy)] -pub struct TraceparentPropagator; +pub struct TraceContextPropagator; -impl TextMapPropagator for TraceparentPropagator { - fn extract(&self, carrier: &dyn Extractor) -> SpanContext { - Self::extract_context(carrier).unwrap_or_default() +impl Propagator for TraceContextPropagator { + fn extract(&self, carrier: &dyn Extractor) -> Option { + Self::extract_context(carrier) } fn inject(&self, _context: SpanContext, _carrier: &mut dyn Injector) { @@ -215,7 +228,7 @@ impl TextMapPropagator for TraceparentPropagator { } } -impl TraceparentPropagator { +impl TraceContextPropagator { fn extract_context(carrier: &dyn Extractor) -> Option { let tp = carrier.get(TRACEPARENT_KEY)?.trim(); @@ -259,6 +272,7 @@ impl TraceparentPropagator { }), origin, tags, + links: Vec::new(), }) } Err(e) => { @@ -318,9 +332,10 @@ impl TraceparentPropagator { tracestate.lower_order_trace_id = Some(lo_tid.to_string()); } + // Convert from `t.` to `_dd.p.` for (k, v) in &dd { - if k.starts_with("t.") { - let nk = format!("_dd.p.{k}"); + if let Some(stripped) = k.strip_prefix("t.") { + let nk = format!("_dd.p.{stripped}"); tags.insert(nk, Self::decode_tag_value(v)); } } @@ -445,19 +460,22 @@ mod test { #[test] fn test_extract_datadog_propagator() { - let mut headers = HashMap::new(); - headers.insert("x-datadog-trace-id".to_string(), "1234".to_string()); - headers.insert("x-datadog-parent-id".to_string(), "5678".to_string()); - headers.insert("x-datadog-sampling-priority".to_string(), "1".to_string()); - headers.insert("x-datadog-origin".to_string(), "synthetics".to_string()); - headers.insert( - "x-datadog-tags".to_string(), - "_dd.p.test=value,_dd.p.tid=4321,any=tag".to_string(), - ); - - let propagator = DatadogPropagator; - - let context = propagator.extract(&headers); + let headers = HashMap::from([ + ("x-datadog-trace-id".to_string(), "1234".to_string()), + ("x-datadog-parent-id".to_string(), "5678".to_string()), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ("x-datadog-origin".to_string(), "synthetics".to_string()), + ( + "x-datadog-tags".to_string(), + "_dd.p.test=value,_dd.p.tid=4321,any=tag".to_string(), + ), + ]); + + let propagator = DatadogHeaderPropagator; + + let context = propagator + .extract(&headers) + .expect("couldn't extract trace context"); assert_eq!(context.trace_id, 1234); assert_eq!(context.span_id, 5678); @@ -471,18 +489,21 @@ mod test { #[test] fn test_extract_traceparent_propagator() { - let mut headers = HashMap::new(); - headers.insert( - "traceparent".to_string(), - "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string(), - ); - headers.insert( - "tracestate".to_string(), - "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string(), - ); - - let propagator = TraceparentPropagator; - let context = propagator.extract(&headers); + let headers = HashMap::from([ + ( + "traceparent".to_string(), + "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string(), + ), + ( + "tracestate".to_string(), + "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string(), + ), + ]); + + let propagator = TraceContextPropagator; + let context = propagator + .extract(&headers) + .expect("couldn't extract trace context"); assert_eq!(context.trace_id, 7277407061855694839); assert_eq!(context.span_id, 67667974448284343); From 905d851b5b67068b1d69fd2c27a7282650bfc076 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Wed, 23 Oct 2024 13:43:07 -0400 Subject: [PATCH 10/41] feat(bottlecap): add Distributed Tracing (#423) * headers `HeaderMap` to `HashMap` * add `Send` to propagators traits * add `serde_json::Value` extractor + injector * add `get_carrier` to `Trigger` trait * add `get_carrier` method to current inferred spans * update `span_inferrer.rs` to use `get_carrier` methods for distributed tracing * add `headers_to_map` function * reparent spans I suspect there might be something wrong here, the code in Go is quite convoluted * make some variables public * fix to return early on `extract_span_context` * fix how 128 bit is handled also updated some variable names * update comment --- .../src/lifecycle/invocation/processor.rs | 66 ++++++++++- .../src/lifecycle/invocation/span_inferrer.rs | 111 ++++++++++-------- .../triggers/api_gateway_http_event.rs | 4 + .../triggers/api_gateway_rest_event.rs | 4 + .../src/lifecycle/invocation/triggers/mod.rs | 1 + bottlecap/src/lifecycle/listener.rs | 48 +++++++- bottlecap/src/traces/propagation/carrier.rs | 55 +++++++++ bottlecap/src/traces/propagation/mod.rs | 8 +- .../traces/propagation/text_map_propagator.rs | 6 +- 9 files changed, 238 insertions(+), 65 deletions(-) diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 9b9cc98ef..d7a7cad66 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -7,6 +7,7 @@ use std::{ use chrono::{DateTime, Utc}; use datadog_trace_protobuf::pb::Span; use datadog_trace_utils::{send_data::SendData, tracer_header_tags}; +use serde_json::{json, Value}; use tokio::sync::mpsc::Sender; use tracing::debug; @@ -14,7 +15,11 @@ use crate::{ config::{self, AwsConfig}, lifecycle::invocation::{context::ContextBuffer, span_inferrer::SpanInferrer}, tags::provider, - traces::trace_processor, + traces::{ + context::SpanContext, + propagation::{DatadogCompositePropagator, Propagator}, + trace_processor, + }, }; pub const MS_TO_NS: f64 = 1_000_000.0; @@ -23,6 +28,9 @@ pub struct Processor { pub context_buffer: ContextBuffer, inferrer: SpanInferrer, pub span: Span, + pub extracted_span_context: Option, + // Used to extract the trace context from inferred span, headers, or payload + propagator: DatadogCompositePropagator, aws_config: AwsConfig, tracer_detected: bool, } @@ -39,6 +47,8 @@ impl Processor { .get_canonical_resource_name() .unwrap_or("aws_lambda".to_string()); + let propagator = DatadogCompositePropagator::new(Arc::clone(&config)); + Processor { context_buffer: ContextBuffer::default(), inferrer: SpanInferrer::default(), @@ -58,6 +68,8 @@ impl Processor { meta_struct: HashMap::new(), span_links: Vec::new(), }, + extracted_span_context: None, + propagator, aws_config: aws_config.clone(), tracer_detected: false, } @@ -166,7 +178,7 @@ impl Processor { /// If this method is called, it means that we are operating in a Universally Instrumented /// runtime. Therefore, we need to set the `tracer_detected` flag to `true`. /// - pub fn on_invocation_start(&mut self, payload: Vec) { + pub fn on_invocation_start(&mut self, headers: HashMap, payload: Vec) { self.tracer_detected = true; // Reset trace context @@ -174,13 +186,60 @@ impl Processor { self.span.parent_id = 0; self.span.span_id = 0; - self.inferrer.infer_span(&payload, &self.aws_config); + let payload_value = match serde_json::from_slice::(&payload) { + Ok(value) => value, + Err(_) => json!({}), + }; + + self.extracted_span_context = self.extract_span_context(&headers, &payload_value); + self.inferrer.infer_span(&payload_value, &self.aws_config); + + if let Some(sc) = &self.extracted_span_context { + self.span.trace_id = sc.trace_id; + self.span.parent_id = sc.span_id; + + // Set the right data to the correct root level span, + // If there's an inferred span, then that should be the root. + if self.inferrer.get_inferred_span().is_some() { + self.inferrer.set_parent_id(sc.span_id); + self.inferrer.extend_meta(sc.tags.clone()); + } else { + self.span.meta.extend(sc.tags.clone()); + } + } if let Some(inferred_span) = self.inferrer.get_inferred_span() { self.span.parent_id = inferred_span.span_id; } } + fn extract_span_context( + &mut self, + headers: &HashMap, + payload_value: &Value, + ) -> Option { + if let Some(carrier) = self.inferrer.get_carrier() { + if let Some(sc) = self.propagator.extract(&carrier) { + debug!("Extracted trace context from inferred span"); + return Some(sc); + } + } + + if let Some(payload_headers) = payload_value.get("headers") { + if let Some(sc) = self.propagator.extract(payload_headers) { + debug!("Extracted trace context from event headers"); + return Some(sc); + } + } + + if let Some(sc) = self.propagator.extract(headers) { + debug!("Extracted trace context from headers"); + return Some(sc); + } + + None + } + /// Given trace context information, set it to the current span. /// pub fn on_invocation_end( @@ -194,7 +253,6 @@ impl Processor { self.span.span_id = span_id; if self.inferrer.get_inferred_span().is_some() { - self.inferrer.set_parent_id(parent_id); if let Some(status_code) = status_code { self.inferrer.set_status_code(status_code); } diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index 7b2a0eefc..6141d8dd2 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use datadog_trace_protobuf::pb::Span; use rand::Rng; use serde_json::Value; @@ -16,6 +18,7 @@ const FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG: &str = "function_trigger.event_sour pub struct SpanInferrer { inferred_span: Option, is_async_span: bool, + carrier: Option>, } impl Default for SpanInferrer { @@ -30,6 +33,7 @@ impl SpanInferrer { Self { inferred_span: None, is_async_span: false, + carrier: None, } } @@ -37,58 +41,56 @@ impl SpanInferrer { /// and try matching it to a `Trigger` implementation, which will create /// an inferred span and set it to `self.inferred_span` /// - pub fn infer_span(&mut self, payload: &[u8], aws_config: &AwsConfig) { + pub fn infer_span(&mut self, payload_value: &Value, aws_config: &AwsConfig) { self.inferred_span = None; - if let Ok(payload_value) = serde_json::from_slice::(payload) { - if APIGatewayHttpEvent::is_match(&payload_value) { - if let Some(t) = APIGatewayHttpEvent::new(payload_value) { - let mut span = Span { - span_id: Self::generate_span_id(), - ..Default::default() - }; - - t.enrich_span(&mut span); - span.meta.extend([ - ( - FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), - "api_gateway".to_string(), - ), - ( - FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), - t.get_arn(&aws_config.region), - ), - ]); - - self.is_async_span = t.is_async(); - self.inferred_span = Some(span); - } - } else if APIGatewayRestEvent::is_match(&payload_value) { - if let Some(t) = APIGatewayRestEvent::new(payload_value) { - let mut span = Span { - span_id: Self::generate_span_id(), - ..Default::default() - }; - - t.enrich_span(&mut span); - span.meta.extend([ - ( - FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), - "api_gateway".to_string(), - ), - ( - FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), - t.get_arn(&aws_config.region), - ), - ]); - - self.is_async_span = t.is_async(); - self.inferred_span = Some(span); - } - } else { - debug!("Unable to infer span from payload"); + if APIGatewayHttpEvent::is_match(payload_value) { + if let Some(t) = APIGatewayHttpEvent::new(payload_value.clone()) { + let mut span = Span { + span_id: Self::generate_span_id(), + ..Default::default() + }; + + t.enrich_span(&mut span); + span.meta.extend([ + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "api_gateway".to_string(), + ), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), + t.get_arn(&aws_config.region), + ), + ]); + + self.carrier = Some(t.get_carrier()); + self.is_async_span = t.is_async(); + self.inferred_span = Some(span); + } + } else if APIGatewayRestEvent::is_match(payload_value) { + if let Some(t) = APIGatewayRestEvent::new(payload_value.clone()) { + let mut span = Span { + span_id: Self::generate_span_id(), + ..Default::default() + }; + + t.enrich_span(&mut span); + span.meta.extend([ + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "api_gateway".to_string(), + ), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), + t.get_arn(&aws_config.region), + ), + ]); + + self.carrier = Some(t.get_carrier()); + self.is_async_span = t.is_async(); + self.inferred_span = Some(span); } } else { - debug!("Unable to serialize payload"); + debug!("Unable to infer span from payload"); } } @@ -101,6 +103,12 @@ impl SpanInferrer { } } + pub fn extend_meta(&mut self, iter: HashMap) { + if let Some(s) = &mut self.inferred_span { + s.meta.extend(iter); + } + } + pub fn set_status_code(&mut self, status_code: String) { if let Some(s) = &mut self.inferred_span { s.meta.insert("http.status_code".to_string(), status_code); @@ -136,4 +144,9 @@ impl SpanInferrer { pub fn get_inferred_span(&self) -> &Option { &self.inferred_span } + + #[must_use] + pub fn get_carrier(&self) -> Option> { + self.carrier.clone() + } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs index effc3e3c8..932541a00 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs @@ -182,6 +182,10 @@ impl Trigger for APIGatewayHttpEvent { .get("x-amz-invocation-type") .is_some_and(|v| v == "Event") } + + fn get_carrier(&self) -> HashMap { + self.headers.clone() + } } #[cfg(test)] diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs index 7a737d576..2ae79c40a 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs @@ -171,6 +171,10 @@ impl Trigger for APIGatewayRestEvent { .get("x-amz-invocation-type") .is_some_and(|v| v == "Event") } + + fn get_carrier(&self) -> HashMap { + self.headers.clone() + } } #[cfg(test)] diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index 5eb32ec6b..f04db8a81 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -13,6 +13,7 @@ pub trait Trigger: Sized { fn enrich_span(&self, span: &mut Span); fn get_tags(&self) -> HashMap; fn get_arn(&self, region: &str) -> String; + fn get_carrier(&self) -> HashMap; fn is_async(&self) -> bool; } diff --git a/bottlecap/src/lifecycle/listener.rs b/bottlecap/src/lifecycle/listener.rs index 4b11717f1..a4d39310b 100644 --- a/bottlecap/src/lifecycle/listener.rs +++ b/bottlecap/src/lifecycle/listener.rs @@ -1,6 +1,7 @@ // Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use std::collections::HashMap; use std::convert::Infallible; use std::net::SocketAddr; use std::sync::Arc; @@ -12,6 +13,10 @@ use tokio::sync::Mutex; use tracing::{debug, error, warn}; use crate::lifecycle::invocation::processor::Processor as InvocationProcessor; +use crate::traces::propagation::text_map_propagator::{ + DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY, DATADOG_SAMPLING_PRIORITY_KEY, DATADOG_TAGS_KEY, + DATADOG_TRACE_ID_KEY, +}; const HELLO_PATH: &str = "/lambda/hello"; const START_INVOCATION_PATH: &str = "/lambda/start-invocation"; @@ -83,18 +88,37 @@ impl Listener { invocation_processor: Arc>, ) -> http::Result> { debug!("Received start invocation request"); - let (_, body) = req.into_parts(); + let (parts, body) = req.into_parts(); match hyper::body::to_bytes(body).await { Ok(b) => { let body = b.to_vec(); let mut processor = invocation_processor.lock().await; - processor.on_invocation_start(body); + let headers = Self::headers_to_map(parts.headers); + + processor.on_invocation_start(headers, body); let mut response = Response::builder().status(200); - if processor.span.trace_id != 0 { - response = - response.header("x-datadog-trace-id", processor.span.trace_id.to_string()); + + // If a `SpanContext` exists, then tell the tracer to use it. + // todo: update this whole code with DatadogHeaderPropagator::inject + // since this logic looks messy + if let Some(sp) = &processor.extracted_span_context { + response = response.header(DATADOG_TRACE_ID_KEY, sp.trace_id.to_string()); + if let Some(priority) = sp.sampling.and_then(|s| s.priority) { + response = + response.header(DATADOG_SAMPLING_PRIORITY_KEY, priority.to_string()); + } + + // Handle 128 bit trace ids + if let Some(trace_id_higher_order_bits) = + sp.tags.get(DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY) + { + response = response.header( + DATADOG_TAGS_KEY, + format!("{DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY}={trace_id_higher_order_bits}"), + ); + } } drop(processor); @@ -128,6 +152,8 @@ impl Listener { let mut processor = invocation_processor.lock().await; + // todo: fix this, code is a copy of the existing logic in Go, not accounting + // when a 128 bit trace id exist let mut trace_id = 0; if let Some(header) = headers.get("x-datadog-trace-id") { if let Ok(header_value) = header.to_str() { @@ -163,4 +189,16 @@ impl Listener { .status(200) .body(Body::from(json!({}).to_string())) } + + fn headers_to_map(headers: http::HeaderMap) -> HashMap { + headers + .iter() + .map(|(k, v)| { + ( + k.as_str().to_string(), + v.to_str().unwrap_or_default().to_string(), + ) + }) + .collect() + } } diff --git a/bottlecap/src/traces/propagation/carrier.rs b/bottlecap/src/traces/propagation/carrier.rs index fc5ef9dbb..d0f2182fa 100644 --- a/bottlecap/src/traces/propagation/carrier.rs +++ b/bottlecap/src/traces/propagation/carrier.rs @@ -3,6 +3,8 @@ /// use std::collections::HashMap; +use serde_json::Value; + /// Injector provides an interface for a carrier to be used /// with a Propagator to inject a Context into the carrier. /// @@ -38,6 +40,35 @@ impl Extractor for HashMap { } } +impl Injector for Value { + /// Set a key and value in the `Value`. + fn set(&mut self, key: &str, value: String) { + if let Value::Object(map) = self { + map.insert(key.to_lowercase(), Value::String(value)); + } + } +} + +impl Extractor for Value { + /// Get a value for a key from the `Value`. + fn get(&self, key: &str) -> Option<&str> { + if let Value::Object(map) = self { + map.get(&key.to_lowercase()).and_then(|v| v.as_str()) + } else { + None + } + } + + /// Collect all the keys from the `Value`. + fn keys(&self) -> Vec<&str> { + if let Value::Object(map) = self { + map.keys().map(String::as_str).collect::>() + } else { + Vec::new() + } + } +} + #[cfg(test)] mod test { use super::*; @@ -65,4 +96,28 @@ mod test { assert!(got.contains(&"headername1")); assert!(got.contains(&"headername2")); } + + #[test] + fn serde_value_get() { + let mut carrier = Value::Object(serde_json::Map::new()); + carrier.set("headerName", "value".to_string()); + + assert_eq!( + Extractor::get(&carrier, "HEADERNAME"), + Some("value"), + "case insensitive extraction" + ); + } + + #[test] + fn serde_value_keys() { + let mut carrier = Value::Object(serde_json::Map::new()); + carrier.set("headerName1", "value1".to_string()); + carrier.set("headerName2", "value2".to_string()); + + let got = Extractor::keys(&carrier); + assert_eq!(got.len(), 2); + assert!(got.contains(&"headername1")); + assert!(got.contains(&"headername2")); + } } diff --git a/bottlecap/src/traces/propagation/mod.rs b/bottlecap/src/traces/propagation/mod.rs index e93d81329..e25e9d35d 100644 --- a/bottlecap/src/traces/propagation/mod.rs +++ b/bottlecap/src/traces/propagation/mod.rs @@ -21,7 +21,7 @@ pub trait Propagator { } pub struct DatadogCompositePropagator { - propagators: Vec>, + propagators: Vec>, config: Arc, } @@ -64,17 +64,17 @@ impl Propagator for DatadogCompositePropagator { impl DatadogCompositePropagator { #[must_use] pub fn new(config: Arc) -> Self { - let propagators: Vec> = config + let propagators: Vec> = config .trace_propagation_style_extract .iter() .filter_map(|style| match style { TracePropagationStyle::Datadog => { Some(Box::new(text_map_propagator::DatadogHeaderPropagator) - as Box) + as Box) } TracePropagationStyle::TraceContext => { Some(Box::new(text_map_propagator::TraceContextPropagator) - as Box) + as Box) } _ => None, }) diff --git a/bottlecap/src/traces/propagation/text_map_propagator.rs b/bottlecap/src/traces/propagation/text_map_propagator.rs index 42b4a17fe..1a0803aac 100644 --- a/bottlecap/src/traces/propagation/text_map_propagator.rs +++ b/bottlecap/src/traces/propagation/text_map_propagator.rs @@ -12,11 +12,11 @@ use crate::traces::propagation::{ }; // Datadog Keys -const DATADOG_TRACE_ID_KEY: &str = "x-datadog-trace-id"; +pub const DATADOG_TRACE_ID_KEY: &str = "x-datadog-trace-id"; const DATADOG_PARENT_ID_KEY: &str = "x-datadog-parent-id"; -const DATADOG_SAMPLING_PRIORITY_KEY: &str = "x-datadog-sampling-priority"; +pub const DATADOG_SAMPLING_PRIORITY_KEY: &str = "x-datadog-sampling-priority"; const DATADOG_ORIGIN_KEY: &str = "x-datadog-origin"; -const DATADOG_TAGS_KEY: &str = "x-datadog-tags"; +pub const DATADOG_TAGS_KEY: &str = "x-datadog-tags"; pub const DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY: &str = "_dd.p.tid"; const DATADOG_PROPAGATION_ERROR_KEY: &str = "_dd.propagation_error"; From 49ea0cbe96e1c17d732117bc31576685e8155195 Mon Sep 17 00:00:00 2001 From: shreyamalpani Date: Wed, 23 Oct 2024 16:33:40 -0400 Subject: [PATCH 11/41] [SVLS-5714] Add lambda network enhanced metrics (#424) * send network enhanced metrics * naming fixes * reformatting reading data from proc --- bottlecap/src/bin/bottlecap/main.rs | 6 +- bottlecap/src/lib.rs | 1 + bottlecap/src/lifecycle/invocation/context.rs | 81 +++++++++++++--- .../src/lifecycle/invocation/processor.rs | 19 +++- bottlecap/src/logs/lambda/processor.rs | 2 +- bottlecap/src/metrics/enhanced/constants.rs | 3 + bottlecap/src/metrics/enhanced/lambda.rs | 86 +++++++++++++++++ bottlecap/src/proc/constants.rs | 3 + bottlecap/src/proc/mod.rs | 92 +++++++++++++++++++ .../tests/proc/net/invalid_dev_malformed | 5 + .../proc/net/invalid_dev_non_numerical_value | 5 + .../tests/proc/net/missing_interface_dev | 4 + bottlecap/tests/proc/net/valid_dev | 5 + 13 files changed, 296 insertions(+), 16 deletions(-) create mode 100644 bottlecap/src/proc/constants.rs create mode 100644 bottlecap/src/proc/mod.rs create mode 100644 bottlecap/tests/proc/net/invalid_dev_malformed create mode 100644 bottlecap/tests/proc/net/invalid_dev_non_numerical_value create mode 100644 bottlecap/tests/proc/net/missing_interface_dev create mode 100644 bottlecap/tests/proc/net/valid_dev diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 0d7108677..e064cde28 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -376,6 +376,9 @@ async fn extension_loop_active( request_id, deadline_ms, invoked_function_arn ); lambda_enhanced_metrics.increment_invocation_metric(); + let mut p = invocation_processor.lock().await; + p.on_invoke_event(request_id); + drop(p); } Ok(NextEventResponse::Shutdown { shutdown_reason, @@ -474,10 +477,11 @@ async fn extension_loop_active( ); lambda_enhanced_metrics.set_report_log_metrics(&metrics); let mut p = invocation_processor.lock().await; - if let Some(post_runtime_duration_ms) = p.on_platform_report(&request_id, metrics.duration_ms) { + if let Some((post_runtime_duration_ms, network_offset)) = p.on_platform_report(&request_id, metrics.duration_ms) { lambda_enhanced_metrics.set_post_runtime_duration_metric( post_runtime_duration_ms, ); + lambda_enhanced_metrics.set_network_enhanced_metrics(network_offset); } drop(p); diff --git a/bottlecap/src/lib.rs b/bottlecap/src/lib.rs index ce2cb847b..59b445215 100644 --- a/bottlecap/src/lib.rs +++ b/bottlecap/src/lib.rs @@ -25,6 +25,7 @@ pub mod lifecycle; pub mod logger; pub mod logs; pub mod metrics; +pub mod proc; pub mod secrets; pub mod tags; pub mod telemetry; diff --git a/bottlecap/src/lifecycle/invocation/context.rs b/bottlecap/src/lifecycle/invocation/context.rs index a1c74bc57..9c00dea30 100644 --- a/bottlecap/src/lifecycle/invocation/context.rs +++ b/bottlecap/src/lifecycle/invocation/context.rs @@ -1,3 +1,4 @@ +use crate::proc::NetworkData; use std::collections::VecDeque; use tracing::debug; @@ -8,6 +9,7 @@ pub struct Context { pub runtime_duration_ms: f64, pub init_duration_ms: f64, pub start_time: i64, + pub network_offset: Option, } impl Context { @@ -17,12 +19,14 @@ impl Context { runtime_duration_ms: f64, init_duration_ms: f64, start_time: i64, + network_offset: Option, ) -> Self { Context { request_id, runtime_duration_ms, init_duration_ms, start_time, + network_offset, } } } @@ -100,7 +104,13 @@ impl ContextBuffer { { context.init_duration_ms = init_duration_ms; } else { - self.insert(Context::new(request_id.clone(), 0.0, init_duration_ms, 0)); + self.insert(Context::new( + request_id.clone(), + 0.0, + init_duration_ms, + 0, + None, + )); } } @@ -115,7 +125,7 @@ impl ContextBuffer { { context.start_time = start_time; } else { - self.insert(Context::new(request_id.clone(), 0.0, 0.0, start_time)); + self.insert(Context::new(request_id.clone(), 0.0, 0.0, start_time, None)); } } @@ -135,10 +145,26 @@ impl ContextBuffer { runtime_duration_ms, 0.0, 0, + None, )); } } + /// Adds the network offset to a `Context` in the buffer. If the `Context` is not found, a new + /// `Context` is created and added to the buffer. + /// + pub fn add_network_offset(&mut self, request_id: &String, network_data: Option) { + if let Some(context) = self + .buffer + .iter_mut() + .find(|context| context.request_id == *request_id) + { + context.network_offset = network_data; + } else { + self.insert(Context::new(request_id.clone(), 0.0, 0.0, 0, network_data)); + } + } + /// Returns the size of the buffer. /// #[must_use] @@ -157,20 +183,20 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); let request_id_2 = String::from("2"); - let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0); + let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 2); assert_eq!(buffer.get(&request_id_2).unwrap(), &context); // This should replace the first context let request_id_3 = String::from("3"); - let context = Context::new(request_id_3.clone(), 0.0, 0.0, 0); + let context = Context::new(request_id_3.clone(), 0.0, 0.0, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 2); assert_eq!(buffer.get(&request_id_3).unwrap(), &context); @@ -184,13 +210,13 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); let request_id_2 = String::from("2"); - let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0); + let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 2); assert_eq!(buffer.get(&request_id_2).unwrap(), &context); @@ -211,13 +237,13 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); let request_id_2 = String::from("2"); - let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0); + let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 2); assert_eq!(buffer.get(&request_id_2).unwrap(), &context); @@ -232,7 +258,7 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); @@ -255,7 +281,7 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); @@ -275,7 +301,7 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); @@ -295,4 +321,35 @@ mod tests { 200.0 ); } + + #[test] + fn test_add_network_offset() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + let network_offset = Some(NetworkData { + rx_bytes: 180.0, + tx_bytes: 254.0, + }); + + buffer.add_network_offset(&request_id, network_offset); + assert_eq!( + buffer.get(&request_id).unwrap().network_offset, + network_offset, + ); + + // Add network offset to a context that doesn't exist + let unexistent_request_id = String::from("unexistent"); + buffer.add_network_offset(&unexistent_request_id, network_offset); + assert_eq!(buffer.size(), 2); + assert_eq!( + buffer.get(&unexistent_request_id).unwrap().network_offset, + network_offset + ); + } } diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index d7a7cad66..112791e9a 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -14,6 +14,7 @@ use tracing::debug; use crate::{ config::{self, AwsConfig}, lifecycle::invocation::{context::ContextBuffer, span_inferrer::SpanInferrer}, + proc::{self, NetworkData}, tags::provider, traces::{ context::SpanContext, @@ -75,6 +76,14 @@ impl Processor { } } + /// Given a `request_id`, add the enhanced metric offsets to the context buffer. + /// + pub fn on_invoke_event(&mut self, request_id: String) { + let network_offset: Option = proc::get_network_data().ok(); + self.context_buffer + .add_network_offset(&request_id, network_offset); + } + /// Given a `request_id` and the time of the platform start, add the start time to the context buffer. /// /// Also, set the start time of the current span. @@ -163,13 +172,19 @@ impl Processor { /// If the `request_id` is not found in the context buffer, return `None`. /// If the `runtime_duration_ms` hasn't been seen, return `None`. /// - pub fn on_platform_report(&mut self, request_id: &String, duration_ms: f64) -> Option { + pub fn on_platform_report( + &mut self, + request_id: &String, + duration_ms: f64, + ) -> Option<(f64, Option)> { if let Some(context) = self.context_buffer.remove(request_id) { if context.runtime_duration_ms == 0.0 { return None; } - return Some(duration_ms - context.runtime_duration_ms); + let post_runtime_duration_ms = duration_ms - context.runtime_duration_ms; + + return Some((post_runtime_duration_ms, context.network_offset)); } None diff --git a/bottlecap/src/logs/lambda/processor.rs b/bottlecap/src/logs/lambda/processor.rs index d3b519682..6de124be6 100644 --- a/bottlecap/src/logs/lambda/processor.rs +++ b/bottlecap/src/logs/lambda/processor.rs @@ -53,7 +53,7 @@ impl LambdaProcessor { service, tags, rules, - invocation_context: InvocationContext::new(String::new(), 0.0, 0.0, 0), + invocation_context: InvocationContext::new(String::new(), 0.0, 0.0, 0, None), orphan_logs: Vec::new(), ready_logs: Vec::new(), event_bus, diff --git a/bottlecap/src/metrics/enhanced/constants.rs b/bottlecap/src/metrics/enhanced/constants.rs index 3c2d34e0a..e82f48057 100644 --- a/bottlecap/src/metrics/enhanced/constants.rs +++ b/bottlecap/src/metrics/enhanced/constants.rs @@ -21,5 +21,8 @@ pub const OUT_OF_MEMORY_METRIC: &str = "aws.lambda.enhanced.out_of_memory"; pub const TIMEOUTS_METRIC: &str = "aws.lambda.enhanced.timeouts"; pub const ERRORS_METRIC: &str = "aws.lambda.enhanced.errors"; pub const INVOCATIONS_METRIC: &str = "aws.lambda.enhanced.invocations"; +pub const RX_BYTES_METRIC: &str = "aws.lambda.enhanced.rx_bytes"; +pub const TX_BYTES_METRIC: &str = "aws.lambda.enhanced.tx_bytes"; +pub const TOTAL_NETWORK_METRIC: &str = "aws.lambda.enhanced.total_network"; //pub const ASM_INVOCATIONS_METRIC: &str = "aws.lambda.enhanced.asm.invocations"; pub const ENHANCED_METRICS_ENV_VAR: &str = "DD_ENHANCED_METRICS"; diff --git a/bottlecap/src/metrics/enhanced/lambda.rs b/bottlecap/src/metrics/enhanced/lambda.rs index 0db917f31..657e57442 100644 --- a/bottlecap/src/metrics/enhanced/lambda.rs +++ b/bottlecap/src/metrics/enhanced/lambda.rs @@ -1,10 +1,12 @@ use super::constants::{self, BASE_LAMBDA_INVOCATION_PRICE}; +use crate::proc::{self, NetworkData}; use crate::telemetry::events::ReportMetrics; use dogstatsd::aggregator::Aggregator; use dogstatsd::metric; use dogstatsd::metric::{Metric, MetricValue}; use std::env::consts::ARCH; use std::sync::{Arc, Mutex}; +use tracing::debug; use tracing::error; pub struct Lambda { @@ -105,6 +107,65 @@ impl Lambda { } } + pub(crate) fn generate_network_enhanced_metrics( + network_data_offset: NetworkData, + network_data_end: NetworkData, + aggr: &mut std::sync::MutexGuard, + ) { + let rx_bytes = network_data_end.rx_bytes - network_data_offset.rx_bytes; + let tx_bytes = network_data_end.tx_bytes - network_data_offset.tx_bytes; + let total_network = rx_bytes + tx_bytes; + + let metric = Metric::new( + constants::RX_BYTES_METRIC.into(), + MetricValue::distribution(rx_bytes), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert rx_bytes metric: {}", e); + } + + let metric = Metric::new( + constants::TX_BYTES_METRIC.into(), + MetricValue::distribution(tx_bytes), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert tx_bytes metric: {}", e); + } + + let metric = Metric::new( + constants::TOTAL_NETWORK_METRIC.into(), + MetricValue::distribution(total_network), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert total_network metric: {}", e); + } + } + + pub fn set_network_enhanced_metrics(&self, network_offset: Option) { + if !self.config.enhanced_metrics { + return; + } + + if let Some(offset) = network_offset { + let mut aggr: std::sync::MutexGuard = + self.aggregator.lock().expect("lock poisoned"); + + match proc::get_network_data() { + Ok(data) => { + Self::generate_network_enhanced_metrics(offset, data, &mut aggr); + } + Err(_e) => { + debug!("Could not find data to generate network enhanced metrics"); + } + } + } else { + debug!("Could not find data to generate network enhanced metrics"); + } + } + fn calculate_estimated_cost_usd(billed_duration_ms: u64, memory_size_mb: u64) -> f64 { let gb_seconds = (billed_duration_ms as f64 * constants::MS_TO_SEC) * (memory_size_mb as f64 / constants::MB_TO_GB); @@ -307,4 +368,29 @@ mod tests { assert_sketch(&metrics_aggr, constants::MAX_MEMORY_USED_METRIC, 128.0); assert_sketch(&metrics_aggr, constants::MEMORY_SIZE_METRIC, 256.0); } + + #[test] + fn test_set_network_enhanced_metrics() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let network_offset = NetworkData { + rx_bytes: 180.0, + tx_bytes: 254.0, + }; + let network_data = NetworkData { + rx_bytes: 20180.0, + tx_bytes: 75000.0, + }; + + Lambda::generate_network_enhanced_metrics( + network_offset, + network_data, + &mut lambda.aggregator.lock().expect("lock poisoned"), + ); + + assert_sketch(&metrics_aggr, constants::RX_BYTES_METRIC, 20000.0); + assert_sketch(&metrics_aggr, constants::TX_BYTES_METRIC, 74746.0); + assert_sketch(&metrics_aggr, constants::TOTAL_NETWORK_METRIC, 94746.0); + } } diff --git a/bottlecap/src/proc/constants.rs b/bottlecap/src/proc/constants.rs new file mode 100644 index 000000000..be7986e53 --- /dev/null +++ b/bottlecap/src/proc/constants.rs @@ -0,0 +1,3 @@ +pub const PROC_NET_DEV_PATH: &str = "/proc/net/dev"; + +pub const LAMDBA_NETWORK_INTERFACE: &str = "vinternal_1"; diff --git a/bottlecap/src/proc/mod.rs b/bottlecap/src/proc/mod.rs new file mode 100644 index 000000000..c33a2984e --- /dev/null +++ b/bottlecap/src/proc/mod.rs @@ -0,0 +1,92 @@ +pub mod constants; + +use std::{ + fs::File, + io::{self, BufRead}, +}; + +use constants::{LAMDBA_NETWORK_INTERFACE, PROC_NET_DEV_PATH}; + +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct NetworkData { + pub rx_bytes: f64, + pub tx_bytes: f64, +} + +pub fn get_network_data() -> Result { + get_network_data_from_path(PROC_NET_DEV_PATH) +} + +fn get_network_data_from_path(path: &str) -> Result { + let file = File::open(path)?; + let reader = io::BufReader::new(file); + + for line in reader.lines() { + let line = line?; + let mut values = line.split_whitespace(); + + if values.next().map_or(false, |interface_name| { + interface_name.starts_with(LAMDBA_NETWORK_INTERFACE) + }) { + // Read the value for received bytes if present + let rx_bytes: Option = values.next().and_then(|s| s.parse().ok()); + + // Skip over the next 7 values representing metrics for received data and + // read the value for bytes transmitted if present + let tx_bytes: Option = values.nth(7).and_then(|s| s.parse().ok()); + + match (rx_bytes, tx_bytes) { + (Some(rx_val), Some(tx_val)) => { + return Ok(NetworkData { + rx_bytes: rx_val, + tx_bytes: tx_val, + }) + } + (_, _) => { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "Network data not found", + )) + } + } + } + } + + Err(io::Error::new( + io::ErrorKind::NotFound, + "Network data not found", + )) +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + + #[test] + #[allow(clippy::float_cmp)] + fn test_get_network_data() { + let path = "./tests/proc/net/valid_dev"; + let network_data_result = get_network_data_from_path(&path); + assert!(!network_data_result.is_err()); + let network_data_result = network_data_result.unwrap(); + assert_eq!(network_data_result.rx_bytes, 180.0); + assert_eq!(network_data_result.tx_bytes, 254.0); + + let path = "./tests/proc/net/invalid_dev_malformed"; + let network_data_result = get_network_data_from_path(&path); + assert!(network_data_result.is_err()); + + let path = "./tests/proc/net/invalid_dev_non_numerical_value"; + let network_data_result = get_network_data_from_path(&path); + assert!(network_data_result.is_err()); + + let path = "./tests/proc/net/missing_interface_dev"; + let network_data_result = get_network_data_from_path(&path); + assert!(network_data_result.is_err()); + + let path = "./tests/proc/net/nonexistent_dev"; + let network_data_result = get_network_data_from_path(&path); + assert!(network_data_result.is_err()); + } +} diff --git a/bottlecap/tests/proc/net/invalid_dev_malformed b/bottlecap/tests/proc/net/invalid_dev_malformed new file mode 100644 index 000000000..5cd9f0ec9 --- /dev/null +++ b/bottlecap/tests/proc/net/invalid_dev_malformed @@ -0,0 +1,5 @@ +Inter-| Receive | Transmit +face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed +lo: 7490 63 0 0 0 0 0 0 7490 63 0 0 0 0 0 0 +vinternal_1: 180 3 0 0 0 ... +telemetry1_sb: 17284 50 0 0 0 0 0 0 15279 78 0 0 0 0 0 0 \ No newline at end of file diff --git a/bottlecap/tests/proc/net/invalid_dev_non_numerical_value b/bottlecap/tests/proc/net/invalid_dev_non_numerical_value new file mode 100644 index 000000000..9aae3404e --- /dev/null +++ b/bottlecap/tests/proc/net/invalid_dev_non_numerical_value @@ -0,0 +1,5 @@ +Inter-| Receive | Transmit +face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed +lo: 7490 63 0 0 0 0 0 0 7490 63 0 0 0 0 0 0 +vinternal_1: INVALID 3 0 0 0 0 0 0 254 4 0 0 0 0 0 0 +telemetry1_sb: 17284 50 0 0 0 0 0 0 15279 78 0 0 0 0 0 0 \ No newline at end of file diff --git a/bottlecap/tests/proc/net/missing_interface_dev b/bottlecap/tests/proc/net/missing_interface_dev new file mode 100644 index 000000000..fb4a0224d --- /dev/null +++ b/bottlecap/tests/proc/net/missing_interface_dev @@ -0,0 +1,4 @@ +Inter-| Receive | Transmit +face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed +lo: 7490 63 0 0 0 0 0 0 7490 63 0 0 0 0 0 0 +telemetry1_sb: 17284 50 0 0 0 0 0 0 15279 78 0 0 0 0 0 0 \ No newline at end of file diff --git a/bottlecap/tests/proc/net/valid_dev b/bottlecap/tests/proc/net/valid_dev new file mode 100644 index 000000000..a20f0cc97 --- /dev/null +++ b/bottlecap/tests/proc/net/valid_dev @@ -0,0 +1,5 @@ +Inter-| Receive | Transmit +face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed +lo: 7490 63 0 0 0 0 0 0 7490 63 0 0 0 0 0 0 +vinternal_1: 180 3 0 0 0 0 0 0 254 4 0 0 0 0 0 0 +telemetry1_sb: 17284 50 0 0 0 0 0 0 15279 78 0 0 0 0 0 0 From 52c4749ce436a1005e3239485074c7ae4d60ece0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:21:49 -0400 Subject: [PATCH 12/41] generate a random id with `OsRng` instead of `thread_rng` on `SnapStart` lambdas (#427) --- bottlecap/src/lifecycle/invocation/span_inferrer.rs | 8 ++++++-- bottlecap/src/tags/lambda/tags.rs | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index 6141d8dd2..70f86d7d9 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use datadog_trace_protobuf::pb::Span; -use rand::Rng; +use rand::{rngs::OsRng, Rng, RngCore}; use serde_json::Value; use tracing::debug; @@ -11,6 +11,7 @@ use crate::lifecycle::invocation::triggers::{ api_gateway_http_event::APIGatewayHttpEvent, api_gateway_rest_event::APIGatewayRestEvent, Trigger, }; +use crate::tags::lambda::tags::{INIT_TYPE, SNAP_START_VALUE}; const FUNCTION_TRIGGER_EVENT_SOURCE_TAG: &str = "function_trigger.event_source"; const FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG: &str = "function_trigger.event_source_arn"; @@ -133,7 +134,10 @@ impl SpanInferrer { } fn generate_span_id() -> u64 { - // todo: secure random id with OsRng for SnapStart + if std::env::var(INIT_TYPE).map_or(false, |it| it == SNAP_START_VALUE) { + return OsRng.next_u64(); + } + let mut rng = rand::thread_rng(); rng.gen() } diff --git a/bottlecap/src/tags/lambda/tags.rs b/bottlecap/src/tags/lambda/tags.rs index 7e6ce54e0..ad1fb0c80 100644 --- a/bottlecap/src/tags/lambda/tags.rs +++ b/bottlecap/src/tags/lambda/tags.rs @@ -10,8 +10,10 @@ use tracing::debug; const QUALIFIER_ENV_VAR: &str = "AWS_LAMBDA_FUNCTION_VERSION"; const RUNTIME_VAR: &str = "AWS_EXECUTION_ENV"; const MEMORY_SIZE_VAR: &str = "AWS_LAMBDA_FUNCTION_MEMORY_SIZE"; -const INIT_TYPE: &str = "AWS_LAMBDA_INITIALIZATION_TYPE"; +pub const INIT_TYPE: &str = "AWS_LAMBDA_INITIALIZATION_TYPE"; const INIT_TYPE_KEY: &str = "init_type"; +// Value for INIT_TYPE when the function is using SnapStart +pub const SNAP_START_VALUE: &str = "snap-start"; // FunctionARNKey is the tag key for a function's arn pub const FUNCTION_ARN_KEY: &str = "function_arn"; From 5fefb6319d45c6d4b8918394214b90e7bb476778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Wed, 30 Oct 2024 17:45:49 +0000 Subject: [PATCH 13/41] fix(bottlecap): add trigger tags to invocation span (#428) * use `get_tags` from `Trigger` trait * remove unneeded comment * add trigger tags to invocation span --- .../src/lifecycle/invocation/processor.rs | 13 ++++++++----- .../src/lifecycle/invocation/span_inferrer.rs | 18 ++++++++++++------ .../triggers/api_gateway_rest_event.rs | 3 --- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 112791e9a..aa9d3b3fb 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -126,16 +126,19 @@ impl Processor { // - error.msg // - error.type // - error.stack - // - trigger tags (from inferred spans) // - metrics tags (for asm) } + if let Some(trigger_tags) = self.inferrer.get_trigger_tags() { + self.span.meta.extend(trigger_tags); + } + self.inferrer.complete_inferred_span(&self.span); if self.tracer_detected { let mut body_size = std::mem::size_of_val(&self.span); let mut traces = vec![self.span.clone()]; - if let Some(inferred_span) = self.inferrer.get_inferred_span() { + if let Some(inferred_span) = &self.inferrer.inferred_span { body_size += std::mem::size_of_val(inferred_span); traces.push(inferred_span.clone()); } @@ -215,7 +218,7 @@ impl Processor { // Set the right data to the correct root level span, // If there's an inferred span, then that should be the root. - if self.inferrer.get_inferred_span().is_some() { + if self.inferrer.inferred_span.is_some() { self.inferrer.set_parent_id(sc.span_id); self.inferrer.extend_meta(sc.tags.clone()); } else { @@ -223,7 +226,7 @@ impl Processor { } } - if let Some(inferred_span) = self.inferrer.get_inferred_span() { + if let Some(inferred_span) = &self.inferrer.inferred_span { self.span.parent_id = inferred_span.span_id; } } @@ -267,7 +270,7 @@ impl Processor { self.span.trace_id = trace_id; self.span.span_id = span_id; - if self.inferrer.get_inferred_span().is_some() { + if self.inferrer.inferred_span.is_some() { if let Some(status_code) = status_code { self.inferrer.set_status_code(status_code); } diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index 70f86d7d9..b63f2a82d 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -17,9 +17,10 @@ const FUNCTION_TRIGGER_EVENT_SOURCE_TAG: &str = "function_trigger.event_source"; const FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG: &str = "function_trigger.event_source_arn"; pub struct SpanInferrer { - inferred_span: Option, + pub inferred_span: Option, is_async_span: bool, carrier: Option>, + trigger_tags: Option>, } impl Default for SpanInferrer { @@ -35,6 +36,7 @@ impl SpanInferrer { inferred_span: None, is_async_span: false, carrier: None, + trigger_tags: None, } } @@ -64,6 +66,7 @@ impl SpanInferrer { ]); self.carrier = Some(t.get_carrier()); + self.trigger_tags = Some(t.get_tags()); self.is_async_span = t.is_async(); self.inferred_span = Some(span); } @@ -87,6 +90,7 @@ impl SpanInferrer { ]); self.carrier = Some(t.get_carrier()); + self.trigger_tags = Some(t.get_tags()); self.is_async_span = t.is_async(); self.inferred_span = Some(span); } @@ -142,15 +146,17 @@ impl SpanInferrer { rng.gen() } - /// Returns a reference to the inner `self.inferred_span` + /// Returns a clone of the carrier associated with the inferred span /// #[must_use] - pub fn get_inferred_span(&self) -> &Option { - &self.inferred_span + pub fn get_carrier(&self) -> Option> { + self.carrier.clone() } + /// Returns a clone of the tags associated with the inferred span + /// #[must_use] - pub fn get_carrier(&self) -> Option> { - self.carrier.clone() + pub fn get_trigger_tags(&self) -> Option> { + self.trigger_tags.clone() } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs index 2ae79c40a..d2588f37a 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs @@ -115,9 +115,6 @@ impl Trigger for APIGatewayRestEvent { self.request_context.resource_path.clone(), ), ])); - - debug!("Enriched Span: {:?}", span); - // todo: update global(? IsAsync if event payload is `Event` } fn get_tags(&self) -> HashMap { From 2024f005af6ad701492bceb3786751c1eee6a6b0 Mon Sep 17 00:00:00 2001 From: shreyamalpani Date: Tue, 5 Nov 2024 13:19:23 -0500 Subject: [PATCH 14/41] Refactor context creation (#433) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * create context on invoke event * update tests * clippy fixes * remove `allow(clippy::ptr_arg)` --------- Co-authored-by: jordan gonzález <30836115+duncanista@users.noreply.github.com> --- bottlecap/src/lifecycle/invocation/context.rs | 74 ++++--------------- .../src/lifecycle/invocation/processor.rs | 2 + 2 files changed, 16 insertions(+), 60 deletions(-) diff --git a/bottlecap/src/lifecycle/invocation/context.rs b/bottlecap/src/lifecycle/invocation/context.rs index 9c00dea30..6fef5a673 100644 --- a/bottlecap/src/lifecycle/invocation/context.rs +++ b/bottlecap/src/lifecycle/invocation/context.rs @@ -93,8 +93,13 @@ impl ContextBuffer { .find(|context| context.request_id == *request_id) } - /// Adds the init duration to a `Context` in the buffer. If the `Context` is not found, a new - /// `Context` is created and added to the buffer. + /// Creates a new `Context` and adds it to the buffer. + /// + pub fn create_context(&mut self, request_id: String) { + self.insert(Context::new(request_id, 0.0, 0.0, 0, None)); + } + + /// Adds the init duration to a `Context` in the buffer. /// pub fn add_init_duration(&mut self, request_id: &String, init_duration_ms: f64) { if let Some(context) = self @@ -104,18 +109,11 @@ impl ContextBuffer { { context.init_duration_ms = init_duration_ms; } else { - self.insert(Context::new( - request_id.clone(), - 0.0, - init_duration_ms, - 0, - None, - )); + debug!("Could not add init duration - context not found"); } } - /// Adds the start time to a `Context` in the buffer. If the `Context` is not found, a new - /// `Context` is created and added to the buffer. + /// Adds the start time to a `Context` in the buffer. /// pub fn add_start_time(&mut self, request_id: &String, start_time: i64) { if let Some(context) = self @@ -125,12 +123,11 @@ impl ContextBuffer { { context.start_time = start_time; } else { - self.insert(Context::new(request_id.clone(), 0.0, 0.0, start_time, None)); + debug!("Could not add start time - context not found"); } } - /// Adds the runtime duration to a `Context` in the buffer. If the `Context` is not found, a new - /// `Context` is created and added to the buffer. + /// Adds the runtime duration to a `Context` in the buffer. /// pub fn add_runtime_duration(&mut self, request_id: &String, runtime_duration_ms: f64) { if let Some(context) = self @@ -140,18 +137,11 @@ impl ContextBuffer { { context.runtime_duration_ms = runtime_duration_ms; } else { - self.insert(Context::new( - request_id.clone(), - runtime_duration_ms, - 0.0, - 0, - None, - )); + debug!("Could not add runtime duration - context not found"); } } - /// Adds the network offset to a `Context` in the buffer. If the `Context` is not found, a new - /// `Context` is created and added to the buffer. + /// Adds the network offset to a `Context` in the buffer. /// pub fn add_network_offset(&mut self, request_id: &String, network_data: Option) { if let Some(context) = self @@ -161,7 +151,7 @@ impl ContextBuffer { { context.network_offset = network_data; } else { - self.insert(Context::new(request_id.clone(), 0.0, 0.0, 0, network_data)); + debug!("Could not add network offset - context not found"); } } @@ -265,15 +255,6 @@ mod tests { buffer.add_init_duration(&request_id, 100.0); assert_eq!(buffer.get(&request_id).unwrap().init_duration_ms, 100.0); - - // Add init duration to a context that doesn't exist - let unexistent_request_id = String::from("unexistent"); - buffer.add_init_duration(&unexistent_request_id, 200.0); - assert_eq!(buffer.size(), 2); - assert_eq!( - buffer.get(&unexistent_request_id).unwrap().init_duration_ms, - 200.0 - ); } #[test] @@ -288,12 +269,6 @@ mod tests { buffer.add_start_time(&request_id, 100); assert_eq!(buffer.get(&request_id).unwrap().start_time, 100); - - // Add start time to a context that doesn't exist - let unexistent_request_id = String::from("unexistent"); - buffer.add_start_time(&unexistent_request_id, 200); - assert_eq!(buffer.size(), 2); - assert_eq!(buffer.get(&unexistent_request_id).unwrap().start_time, 200); } #[test] @@ -308,18 +283,6 @@ mod tests { buffer.add_runtime_duration(&request_id, 100.0); assert_eq!(buffer.get(&request_id).unwrap().runtime_duration_ms, 100.0); - - // Add runtime duration to a context that doesn't exist - let unexistent_request_id = String::from("unexistent"); - buffer.add_runtime_duration(&unexistent_request_id, 200.0); - assert_eq!(buffer.size(), 2); - assert_eq!( - buffer - .get(&unexistent_request_id) - .unwrap() - .runtime_duration_ms, - 200.0 - ); } #[test] @@ -342,14 +305,5 @@ mod tests { buffer.get(&request_id).unwrap().network_offset, network_offset, ); - - // Add network offset to a context that doesn't exist - let unexistent_request_id = String::from("unexistent"); - buffer.add_network_offset(&unexistent_request_id, network_offset); - assert_eq!(buffer.size(), 2); - assert_eq!( - buffer.get(&unexistent_request_id).unwrap().network_offset, - network_offset - ); } } diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index aa9d3b3fb..82a06724b 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -79,6 +79,8 @@ impl Processor { /// Given a `request_id`, add the enhanced metric offsets to the context buffer. /// pub fn on_invoke_event(&mut self, request_id: String) { + self.context_buffer.create_context(request_id.clone()); + let network_offset: Option = proc::get_network_data().ok(); self.context_buffer .add_network_offset(&request_id, network_offset); From 12d7a9599aae35be5b378f554d4a79b26246de84 Mon Sep 17 00:00:00 2001 From: shreyamalpani Date: Wed, 6 Nov 2024 11:26:35 -0500 Subject: [PATCH 15/41] Generate CPU Enhanced Metrics (#430) * send cpu metrics * clippy fixes * fixes * set utilization metrics before flushing & format fixes * added comment to explain utilization metrics calculation timing * use nix instead of libc to get system clock * update LICENSE-3rdparty.yml * added comments to explain calculations * clippy --- bottlecap/Cargo.lock | 30 +- bottlecap/Cargo.toml | 1 + bottlecap/LICENSE-3rdparty.yml | 62 ++++ bottlecap/src/bin/bottlecap/main.rs | 21 +- bottlecap/src/lifecycle/invocation/context.rs | 45 ++- .../src/lifecycle/invocation/processor.rs | 44 ++- bottlecap/src/metrics/enhanced/constants.rs | 8 + bottlecap/src/metrics/enhanced/lambda.rs | 304 +++++++++++++++++- bottlecap/src/proc/clock.rs | 20 ++ bottlecap/src/proc/constants.rs | 2 + bottlecap/src/proc/mod.rs | 197 +++++++++++- .../stat/invalid_stat_malformed_first_line | 2 + .../stat/invalid_stat_malformed_per_cpu_line | 10 + .../proc/stat/invalid_stat_missing_cpun_data | 8 + .../stat/invalid_stat_non_numerical_value_1 | 2 + .../stat/invalid_stat_non_numerical_value_2 | 2 + bottlecap/tests/proc/stat/valid_stat | 10 + .../tests/proc/uptime/invalid_data_uptime | 1 + bottlecap/tests/proc/uptime/malformed_uptime | 1 + bottlecap/tests/proc/uptime/valid_uptime | 1 + 20 files changed, 733 insertions(+), 38 deletions(-) create mode 100644 bottlecap/src/proc/clock.rs create mode 100644 bottlecap/tests/proc/stat/invalid_stat_malformed_first_line create mode 100644 bottlecap/tests/proc/stat/invalid_stat_malformed_per_cpu_line create mode 100644 bottlecap/tests/proc/stat/invalid_stat_missing_cpun_data create mode 100644 bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_1 create mode 100644 bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_2 create mode 100644 bottlecap/tests/proc/stat/valid_stat create mode 100644 bottlecap/tests/proc/uptime/invalid_data_uptime create mode 100644 bottlecap/tests/proc/uptime/malformed_uptime create mode 100644 bottlecap/tests/proc/uptime/valid_uptime diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index f96fd4c7a..c61ae2695 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -344,7 +344,7 @@ version = "0.69.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" dependencies = [ - "bitflags", + "bitflags 2.6.0", "cexpr", "clang-sys", "itertools 0.12.1", @@ -376,6 +376,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.6.0" @@ -427,6 +433,7 @@ dependencies = [ "hyper 0.14.30", "lazy_static", "log", + "nix", "proptest", "protobuf", "rand", @@ -1575,7 +1582,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags", + "bitflags 2.6.0", "libc", ] @@ -1670,6 +1677,17 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", +] + [[package]] name = "nom" version = "7.1.3" @@ -1931,7 +1949,7 @@ checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d" dependencies = [ "bit-set", "bit-vec", - "bitflags", + "bitflags 2.6.0", "lazy_static", "num-traits", "rand", @@ -2179,7 +2197,7 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ - "bitflags", + "bitflags 2.6.0", ] [[package]] @@ -2366,7 +2384,7 @@ version = "0.38.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" dependencies = [ - "bitflags", + "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", @@ -2545,7 +2563,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags", + "bitflags 2.6.0", "core-foundation", "core-foundation-sys", "libc", diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index 09b3c0e13..efe1cbb11 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -20,6 +20,7 @@ figment = { version = "0.10", default-features = false, features = ["yaml", "env hyper = { version = "0.14", default-features = false, features = ["server"] } lazy_static = { version = "1.5", default-features = false } log = { version = "0.4", default-features = false } +nix = { version = "0.26", default-features = false, features = ["feature"] } protobuf = { version = "3.5", default-features = false } regex = { version = "1.10", default-features = false } reqwest = { version = "0.12", features = ["json", "http2", "rustls-tls"], default-features = false } diff --git a/bottlecap/LICENSE-3rdparty.yml b/bottlecap/LICENSE-3rdparty.yml index 83b5f497a..e20352a0e 100644 --- a/bottlecap/LICENSE-3rdparty.yml +++ b/bottlecap/LICENSE-3rdparty.yml @@ -2665,6 +2665,40 @@ third_party_libraries: THE SOFTWARE. - license: Apache-2.0 text: " Apache License\n Version 2.0, January 2004\n http://www.apache.org/licenses/\n\nTERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n1. Definitions.\n\n \"License\" shall mean the terms and conditions for use, reproduction,\n and distribution as defined by Sections 1 through 9 of this document.\n\n \"Licensor\" shall mean the copyright owner or entity authorized by\n the copyright owner that is granting the License.\n\n \"Legal Entity\" shall mean the union of the acting entity and all\n other entities that control, are controlled by, or are under common\n control with that entity. For the purposes of this definition,\n \"control\" means (i) the power, direct or indirect, to cause the\n direction or management of such entity, whether by contract or\n otherwise, or (ii) ownership of fifty percent (50%) or more of the\n outstanding shares, or (iii) beneficial ownership of such entity.\n\n \"You\" (or \"Your\") shall mean an individual or Legal Entity\n exercising permissions granted by this License.\n\n \"Source\" form shall mean the preferred form for making modifications,\n including but not limited to software source code, documentation\n source, and configuration files.\n\n \"Object\" form shall mean any form resulting from mechanical\n transformation or translation of a Source form, including but\n not limited to compiled object code, generated documentation,\n and conversions to other media types.\n\n \"Work\" shall mean the work of authorship, whether in Source or\n Object form, made available under the License, as indicated by a\n copyright notice that is included in or attached to the work\n (an example is provided in the Appendix below).\n\n \"Derivative Works\" shall mean any work, whether in Source or Object\n form, that is based on (or derived from) the Work and for which the\n editorial revisions, annotations, elaborations, or other modifications\n represent, as a whole, an original work of authorship. For the purposes\n of this License, Derivative Works shall not include works that remain\n separable from, or merely link (or bind by name) to the interfaces of,\n the Work and Derivative Works thereof.\n\n \"Contribution\" shall mean any work of authorship, including\n the original version of the Work and any modifications or additions\n to that Work or Derivative Works thereof, that is intentionally\n submitted to Licensor for inclusion in the Work by the copyright owner\n or by an individual or Legal Entity authorized to submit on behalf of\n the copyright owner. For the purposes of this definition, \"submitted\"\n means any form of electronic, verbal, or written communication sent\n to the Licensor or its representatives, including but not limited to\n communication on electronic mailing lists, source code control systems,\n and issue tracking systems that are managed by, or on behalf of, the\n Licensor for the purpose of discussing and improving the Work, but\n excluding communication that is conspicuously marked or otherwise\n designated in writing by the copyright owner as \"Not a Contribution.\"\n\n \"Contributor\" shall mean Licensor and any individual or Legal Entity\n on behalf of whom a Contribution has been received by Licensor and\n subsequently incorporated within the Work.\n\n2. Grant of Copyright License. Subject to the terms and conditions of\n this License, each Contributor hereby grants to You a perpetual,\n worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n copyright license to reproduce, prepare Derivative Works of,\n publicly display, publicly perform, sublicense, and distribute the\n Work and such Derivative Works in Source or Object form.\n\n3. Grant of Patent License. Subject to the terms and conditions of\n this License, each Contributor hereby grants to You a perpetual,\n worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n (except as stated in this section) patent license to make, have made,\n use, offer to sell, sell, import, and otherwise transfer the Work,\n where such license applies only to those patent claims licensable\n by such Contributor that are necessarily infringed by their\n Contribution(s) alone or by combination of their Contribution(s)\n with the Work to which such Contribution(s) was submitted. If You\n institute patent litigation against any entity (including a\n cross-claim or counterclaim in a lawsuit) alleging that the Work\n or a Contribution incorporated within the Work constitutes direct\n or contributory patent infringement, then any patent licenses\n granted to You under this License for that Work shall terminate\n as of the date such litigation is filed.\n\n4. Redistribution. You may reproduce and distribute copies of the\n Work or Derivative Works thereof in any medium, with or without\n modifications, and in Source or Object form, provided that You\n meet the following conditions:\n\n (a) You must give any other recipients of the Work or\n Derivative Works a copy of this License; and\n\n (b) You must cause any modified files to carry prominent notices\n stating that You changed the files; and\n\n (c) You must retain, in the Source form of any Derivative Works\n that You distribute, all copyright, patent, trademark, and\n attribution notices from the Source form of the Work,\n excluding those notices that do not pertain to any part of\n the Derivative Works; and\n\n (d) If the Work includes a \"NOTICE\" text file as part of its\n distribution, then any Derivative Works that You distribute must\n include a readable copy of the attribution notices contained\n within such NOTICE file, excluding those notices that do not\n pertain to any part of the Derivative Works, in at least one\n of the following places: within a NOTICE text file distributed\n as part of the Derivative Works; within the Source form or\n documentation, if provided along with the Derivative Works; or,\n within a display generated by the Derivative Works, if and\n wherever such third-party notices normally appear. The contents\n of the NOTICE file are for informational purposes only and\n do not modify the License. You may add Your own attribution\n notices within Derivative Works that You distribute, alongside\n or as an addendum to the NOTICE text from the Work, provided\n that such additional attribution notices cannot be construed\n as modifying the License.\n\n You may add Your own copyright statement to Your modifications and\n may provide additional or different license terms and conditions\n for use, reproduction, or distribution of Your modifications, or\n for any such Derivative Works as a whole, provided Your use,\n reproduction, and distribution of the Work otherwise complies with\n the conditions stated in this License.\n\n5. Submission of Contributions. Unless You explicitly state otherwise,\n any Contribution intentionally submitted for inclusion in the Work\n by You to the Licensor shall be under the terms and conditions of\n this License, without any additional terms or conditions.\n Notwithstanding the above, nothing herein shall supersede or modify\n the terms of any separate license agreement you may have executed\n with Licensor regarding such Contributions.\n\n6. Trademarks. This License does not grant permission to use the trade\n names, trademarks, service marks, or product names of the Licensor,\n except as required for reasonable and customary use in describing the\n origin of the Work and reproducing the content of the NOTICE file.\n\n7. Disclaimer of Warranty. Unless required by applicable law or\n agreed to in writing, Licensor provides the Work (and each\n Contributor provides its Contributions) on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n implied, including, without limitation, any warranties or conditions\n of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n PARTICULAR PURPOSE. You are solely responsible for determining the\n appropriateness of using or redistributing the Work and assume any\n risks associated with Your exercise of permissions under this License.\n\n8. Limitation of Liability. In no event and under no legal theory,\n whether in tort (including negligence), contract, or otherwise,\n unless required by applicable law (such as deliberate and grossly\n negligent acts) or agreed to in writing, shall any Contributor be\n liable to You for damages, including any direct, indirect, special,\n incidental, or consequential damages of any character arising as a\n result of this License or out of the use or inability to use the\n Work (including but not limited to damages for loss of goodwill,\n work stoppage, computer failure or malfunction, or any and all\n other commercial damages or losses), even if such Contributor\n has been advised of the possibility of such damages.\n\n9. Accepting Warranty or Additional Liability. While redistributing\n the Work or Derivative Works thereof, You may choose to offer,\n and charge a fee for, acceptance of support, warranty, indemnity,\n or other liability obligations and/or rights consistent with this\n License. However, in accepting such obligations, You may act only\n on Your own behalf and on Your sole responsibility, not on behalf\n of any other Contributor, and only if You agree to indemnify,\n defend, and hold each Contributor harmless for any liability\n incurred by, or claims asserted against, such Contributor by reason\n of your accepting any such warranty or additional liability.\n\nEND OF TERMS AND CONDITIONS\n\nAPPENDIX: How to apply the Apache License to your work.\n\n To apply the Apache License to your work, attach the following\n boilerplate notice, with the fields enclosed by brackets \"[]\"\n replaced with your own identifying information. (Don't include\n the brackets!) The text should be enclosed in the appropriate\n comment syntax for the file format. We also recommend that a\n file or class name and description of purpose be included on the\n same \"printed page\" as the copyright notice for easier\n identification within third-party archives.\n\nCopyright [yyyy] [name of copyright owner]\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n" +- package_name: bitflags + package_version: 1.3.2 + repository: https://github.com/bitflags/bitflags + license: MIT/Apache-2.0 + licenses: + - license: MIT + text: | + Copyright (c) 2014 The Rust Project Developers + + Permission is hereby granted, free of charge, to any + person obtaining a copy of this software and associated + documentation files (the "Software"), to deal in the + Software without restriction, including without + limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of + the Software, and to permit persons to whom the Software + is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice + shall be included in all copies or substantial portions + of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + - license: Apache-2.0 + text: " Apache License\n Version 2.0, January 2004\n http://www.apache.org/licenses/\n\nTERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n1. Definitions.\n\n \"License\" shall mean the terms and conditions for use, reproduction,\n and distribution as defined by Sections 1 through 9 of this document.\n\n \"Licensor\" shall mean the copyright owner or entity authorized by\n the copyright owner that is granting the License.\n\n \"Legal Entity\" shall mean the union of the acting entity and all\n other entities that control, are controlled by, or are under common\n control with that entity. For the purposes of this definition,\n \"control\" means (i) the power, direct or indirect, to cause the\n direction or management of such entity, whether by contract or\n otherwise, or (ii) ownership of fifty percent (50%) or more of the\n outstanding shares, or (iii) beneficial ownership of such entity.\n\n \"You\" (or \"Your\") shall mean an individual or Legal Entity\n exercising permissions granted by this License.\n\n \"Source\" form shall mean the preferred form for making modifications,\n including but not limited to software source code, documentation\n source, and configuration files.\n\n \"Object\" form shall mean any form resulting from mechanical\n transformation or translation of a Source form, including but\n not limited to compiled object code, generated documentation,\n and conversions to other media types.\n\n \"Work\" shall mean the work of authorship, whether in Source or\n Object form, made available under the License, as indicated by a\n copyright notice that is included in or attached to the work\n (an example is provided in the Appendix below).\n\n \"Derivative Works\" shall mean any work, whether in Source or Object\n form, that is based on (or derived from) the Work and for which the\n editorial revisions, annotations, elaborations, or other modifications\n represent, as a whole, an original work of authorship. For the purposes\n of this License, Derivative Works shall not include works that remain\n separable from, or merely link (or bind by name) to the interfaces of,\n the Work and Derivative Works thereof.\n\n \"Contribution\" shall mean any work of authorship, including\n the original version of the Work and any modifications or additions\n to that Work or Derivative Works thereof, that is intentionally\n submitted to Licensor for inclusion in the Work by the copyright owner\n or by an individual or Legal Entity authorized to submit on behalf of\n the copyright owner. For the purposes of this definition, \"submitted\"\n means any form of electronic, verbal, or written communication sent\n to the Licensor or its representatives, including but not limited to\n communication on electronic mailing lists, source code control systems,\n and issue tracking systems that are managed by, or on behalf of, the\n Licensor for the purpose of discussing and improving the Work, but\n excluding communication that is conspicuously marked or otherwise\n designated in writing by the copyright owner as \"Not a Contribution.\"\n\n \"Contributor\" shall mean Licensor and any individual or Legal Entity\n on behalf of whom a Contribution has been received by Licensor and\n subsequently incorporated within the Work.\n\n2. Grant of Copyright License. Subject to the terms and conditions of\n this License, each Contributor hereby grants to You a perpetual,\n worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n copyright license to reproduce, prepare Derivative Works of,\n publicly display, publicly perform, sublicense, and distribute the\n Work and such Derivative Works in Source or Object form.\n\n3. Grant of Patent License. Subject to the terms and conditions of\n this License, each Contributor hereby grants to You a perpetual,\n worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n (except as stated in this section) patent license to make, have made,\n use, offer to sell, sell, import, and otherwise transfer the Work,\n where such license applies only to those patent claims licensable\n by such Contributor that are necessarily infringed by their\n Contribution(s) alone or by combination of their Contribution(s)\n with the Work to which such Contribution(s) was submitted. If You\n institute patent litigation against any entity (including a\n cross-claim or counterclaim in a lawsuit) alleging that the Work\n or a Contribution incorporated within the Work constitutes direct\n or contributory patent infringement, then any patent licenses\n granted to You under this License for that Work shall terminate\n as of the date such litigation is filed.\n\n4. Redistribution. You may reproduce and distribute copies of the\n Work or Derivative Works thereof in any medium, with or without\n modifications, and in Source or Object form, provided that You\n meet the following conditions:\n\n (a) You must give any other recipients of the Work or\n Derivative Works a copy of this License; and\n\n (b) You must cause any modified files to carry prominent notices\n stating that You changed the files; and\n\n (c) You must retain, in the Source form of any Derivative Works\n that You distribute, all copyright, patent, trademark, and\n attribution notices from the Source form of the Work,\n excluding those notices that do not pertain to any part of\n the Derivative Works; and\n\n (d) If the Work includes a \"NOTICE\" text file as part of its\n distribution, then any Derivative Works that You distribute must\n include a readable copy of the attribution notices contained\n within such NOTICE file, excluding those notices that do not\n pertain to any part of the Derivative Works, in at least one\n of the following places: within a NOTICE text file distributed\n as part of the Derivative Works; within the Source form or\n documentation, if provided along with the Derivative Works; or,\n within a display generated by the Derivative Works, if and\n wherever such third-party notices normally appear. The contents\n of the NOTICE file are for informational purposes only and\n do not modify the License. You may add Your own attribution\n notices within Derivative Works that You distribute, alongside\n or as an addendum to the NOTICE text from the Work, provided\n that such additional attribution notices cannot be construed\n as modifying the License.\n\n You may add Your own copyright statement to Your modifications and\n may provide additional or different license terms and conditions\n for use, reproduction, or distribution of Your modifications, or\n for any such Derivative Works as a whole, provided Your use,\n reproduction, and distribution of the Work otherwise complies with\n the conditions stated in this License.\n\n5. Submission of Contributions. Unless You explicitly state otherwise,\n any Contribution intentionally submitted for inclusion in the Work\n by You to the Licensor shall be under the terms and conditions of\n this License, without any additional terms or conditions.\n Notwithstanding the above, nothing herein shall supersede or modify\n the terms of any separate license agreement you may have executed\n with Licensor regarding such Contributions.\n\n6. Trademarks. This License does not grant permission to use the trade\n names, trademarks, service marks, or product names of the Licensor,\n except as required for reasonable and customary use in describing the\n origin of the Work and reproducing the content of the NOTICE file.\n\n7. Disclaimer of Warranty. Unless required by applicable law or\n agreed to in writing, Licensor provides the Work (and each\n Contributor provides its Contributions) on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n implied, including, without limitation, any warranties or conditions\n of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n PARTICULAR PURPOSE. You are solely responsible for determining the\n appropriateness of using or redistributing the Work and assume any\n risks associated with Your exercise of permissions under this License.\n\n8. Limitation of Liability. In no event and under no legal theory,\n whether in tort (including negligence), contract, or otherwise,\n unless required by applicable law (such as deliberate and grossly\n negligent acts) or agreed to in writing, shall any Contributor be\n liable to You for damages, including any direct, indirect, special,\n incidental, or consequential damages of any character arising as a\n result of this License or out of the use or inability to use the\n Work (including but not limited to damages for loss of goodwill,\n work stoppage, computer failure or malfunction, or any and all\n other commercial damages or losses), even if such Contributor\n has been advised of the possibility of such damages.\n\n9. Accepting Warranty or Additional Liability. While redistributing\n the Work or Derivative Works thereof, You may choose to offer,\n and charge a fee for, acceptance of support, warranty, indemnity,\n or other liability obligations and/or rights consistent with this\n License. However, in accepting such obligations, You may act only\n on Your own behalf and on Your sole responsibility, not on behalf\n of any other Contributor, and only if You agree to indemnify,\n defend, and hold each Contributor harmless for any liability\n incurred by, or claims asserted against, such Contributor by reason\n of your accepting any such warranty or additional liability.\n\nEND OF TERMS AND CONDITIONS\n\nAPPENDIX: How to apply the Apache License to your work.\n\n To apply the Apache License to your work, attach the following\n boilerplate notice, with the fields enclosed by brackets \"[]\"\n replaced with your own identifying information. (Don't include\n the brackets!) The text should be enclosed in the appropriate\n comment syntax for the file format. We also recommend that a\n file or class name and description of purpose be included on the\n same \"printed page\" as the copyright notice for easier\n identification within third-party archives.\n\nCopyright [yyyy] [name of copyright owner]\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n" - package_name: bitflags package_version: 2.6.0 repository: https://github.com/bitflags/bitflags @@ -10259,6 +10293,34 @@ third_party_libraries: licenses: - license: MIT text: NOT FOUND +- package_name: nix + package_version: 0.26.4 + repository: https://github.com/nix-rust/nix + license: MIT + licenses: + - license: MIT + text: | + The MIT License (MIT) + + Copyright (c) 2015 Carl Lerche + nix-rust Authors + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. - package_name: num-traits package_version: 0.2.19 repository: https://github.com/rust-num/num-traits diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index e064cde28..21f6c777a 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -426,8 +426,9 @@ async fn extension_loop_active( .. } => { let mut p = invocation_processor.lock().await; + let mut enhanced_metric_data = None; if let Some(metrics) = metrics { - p.on_platform_runtime_done( + enhanced_metric_data = p.on_platform_runtime_done( &request_id, metrics.duration_ms, config.clone(), @@ -451,6 +452,11 @@ async fn extension_loop_active( request_id, status ); + // set cpu utilization metrics here to avoid accounting for extra idle time + if let Some(offsets) = enhanced_metric_data { + lambda_enhanced_metrics.set_cpu_utilization_enhanced_metrics(offsets.cpu_offset, offsets.uptime_offset); + } + // TODO(astuyve) it'll be easy to // pass the invocation deadline to // flush tasks here, so they can @@ -463,6 +469,7 @@ async fn extension_loop_active( stats_flusher.manual_flush() ); } + break; } TelemetryRecord::PlatformReport { @@ -477,11 +484,13 @@ async fn extension_loop_active( ); lambda_enhanced_metrics.set_report_log_metrics(&metrics); let mut p = invocation_processor.lock().await; - if let Some((post_runtime_duration_ms, network_offset)) = p.on_platform_report(&request_id, metrics.duration_ms) { - lambda_enhanced_metrics.set_post_runtime_duration_metric( - post_runtime_duration_ms, - ); - lambda_enhanced_metrics.set_network_enhanced_metrics(network_offset); + let (post_runtime_duration_ms, enhanced_metric_data) = p.on_platform_report(&request_id, metrics.duration_ms); + if let Some(duration) = post_runtime_duration_ms { + lambda_enhanced_metrics.set_post_runtime_duration_metric(duration); + } + if let Some(offsets) = enhanced_metric_data { + lambda_enhanced_metrics.set_network_enhanced_metrics(offsets.network_offset); + lambda_enhanced_metrics.set_cpu_time_enhanced_metrics(offsets.cpu_offset); } drop(p); diff --git a/bottlecap/src/lifecycle/invocation/context.rs b/bottlecap/src/lifecycle/invocation/context.rs index 6fef5a673..325fd3cd3 100644 --- a/bottlecap/src/lifecycle/invocation/context.rs +++ b/bottlecap/src/lifecycle/invocation/context.rs @@ -1,4 +1,4 @@ -use crate::proc::NetworkData; +use crate::metrics::enhanced::lambda::EnhancedMetricData; use std::collections::VecDeque; use tracing::debug; @@ -9,7 +9,7 @@ pub struct Context { pub runtime_duration_ms: f64, pub init_duration_ms: f64, pub start_time: i64, - pub network_offset: Option, + pub enhanced_metric_data: Option, } impl Context { @@ -19,14 +19,14 @@ impl Context { runtime_duration_ms: f64, init_duration_ms: f64, start_time: i64, - network_offset: Option, + enhanced_metric_data: Option, ) -> Self { Context { request_id, runtime_duration_ms, init_duration_ms, start_time, - network_offset, + enhanced_metric_data, } } } @@ -143,13 +143,17 @@ impl ContextBuffer { /// Adds the network offset to a `Context` in the buffer. /// - pub fn add_network_offset(&mut self, request_id: &String, network_data: Option) { + pub fn add_enhanced_metric_data( + &mut self, + request_id: &String, + enhanced_metric_data: Option, + ) { if let Some(context) = self .buffer .iter_mut() .find(|context| context.request_id == *request_id) { - context.network_offset = network_data; + context.enhanced_metric_data = enhanced_metric_data; } else { debug!("Could not add network offset - context not found"); } @@ -166,6 +170,9 @@ impl ContextBuffer { #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { + use crate::proc::{CPUData, NetworkData}; + use std::collections::HashMap; + use super::*; #[test] @@ -286,7 +293,7 @@ mod tests { } #[test] - fn test_add_network_offset() { + fn test_add_enhanced_metric_data() { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); @@ -300,10 +307,28 @@ mod tests { tx_bytes: 254.0, }); - buffer.add_network_offset(&request_id, network_offset); - assert_eq!( - buffer.get(&request_id).unwrap().network_offset, + let mut individual_cpu_idle_times = HashMap::new(); + individual_cpu_idle_times.insert("cpu0".to_string(), 10.0); + individual_cpu_idle_times.insert("cpu1".to_string(), 20.0); + let cpu_offset = Some(CPUData { + total_user_time_ms: 100.0, + total_system_time_ms: 53.0, + total_idle_time_ms: 20.0, + individual_cpu_idle_times: individual_cpu_idle_times, + }); + + let uptime_offset = Some(50.0); + + let enhanced_metric_data = Some(EnhancedMetricData { network_offset, + cpu_offset, + uptime_offset, + }); + + buffer.add_enhanced_metric_data(&request_id, enhanced_metric_data.clone()); + assert_eq!( + buffer.get(&request_id).unwrap().enhanced_metric_data, + enhanced_metric_data, ); } } diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 82a06724b..e7d2925fc 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -14,7 +14,8 @@ use tracing::debug; use crate::{ config::{self, AwsConfig}, lifecycle::invocation::{context::ContextBuffer, span_inferrer::SpanInferrer}, - proc::{self, NetworkData}, + metrics::enhanced::lambda::EnhancedMetricData, + proc::{self, CPUData, NetworkData}, tags::provider, traces::{ context::SpanContext, @@ -34,6 +35,7 @@ pub struct Processor { propagator: DatadogCompositePropagator, aws_config: AwsConfig, tracer_detected: bool, + collect_enhanced_data: bool, } impl Processor { @@ -73,17 +75,26 @@ impl Processor { propagator, aws_config: aws_config.clone(), tracer_detected: false, + collect_enhanced_data: config.enhanced_metrics, } } - /// Given a `request_id`, add the enhanced metric offsets to the context buffer. + /// Given a `request_id`, creates the context and adds the enhanced metric offsets to the context buffer. /// pub fn on_invoke_event(&mut self, request_id: String) { self.context_buffer.create_context(request_id.clone()); - - let network_offset: Option = proc::get_network_data().ok(); - self.context_buffer - .add_network_offset(&request_id, network_offset); + if self.collect_enhanced_data { + let network_offset: Option = proc::get_network_data().ok(); + let cpu_offset: Option = proc::get_cpu_data().ok(); + let uptime_offset: Option = proc::get_uptime().ok(); + let enhanced_metric_offsets = Some(EnhancedMetricData { + network_offset, + cpu_offset, + uptime_offset, + }); + self.context_buffer + .add_enhanced_metric_data(&request_id, enhanced_metric_offsets); + } } /// Given a `request_id` and the time of the platform start, add the start time to the context buffer. @@ -110,10 +121,11 @@ impl Processor { tags_provider: Arc, trace_processor: Arc, trace_agent_tx: Sender, - ) { + ) -> Option { self.context_buffer .add_runtime_duration(request_id, duration_ms); + let mut enhanced_metric_data: Option = None; if let Some(context) = self.context_buffer.get(request_id) { let span = &mut self.span; // `round` is intentionally meant to be a whole integer @@ -129,6 +141,8 @@ impl Processor { // - error.type // - error.stack // - metrics tags (for asm) + + enhanced_metric_data.clone_from(&context.enhanced_metric_data); } if let Some(trigger_tags) = self.inferrer.get_trigger_tags() { @@ -169,6 +183,8 @@ impl Processor { debug!("Failed to send invocation span to agent: {e}"); } } + + enhanced_metric_data } /// Given a `request_id` and the duration in milliseconds of the platform report, @@ -181,18 +197,18 @@ impl Processor { &mut self, request_id: &String, duration_ms: f64, - ) -> Option<(f64, Option)> { + ) -> (Option, Option) { if let Some(context) = self.context_buffer.remove(request_id) { - if context.runtime_duration_ms == 0.0 { - return None; - } + let mut post_runtime_duration_ms: Option = None; - let post_runtime_duration_ms = duration_ms - context.runtime_duration_ms; + if context.runtime_duration_ms != 0.0 { + post_runtime_duration_ms = Some(duration_ms - context.runtime_duration_ms); + } - return Some((post_runtime_duration_ms, context.network_offset)); + return (post_runtime_duration_ms, context.enhanced_metric_data); } - None + (None, None) } /// If this method is called, it means that we are operating in a Universally Instrumented diff --git a/bottlecap/src/metrics/enhanced/constants.rs b/bottlecap/src/metrics/enhanced/constants.rs index e82f48057..2d17e73ec 100644 --- a/bottlecap/src/metrics/enhanced/constants.rs +++ b/bottlecap/src/metrics/enhanced/constants.rs @@ -24,5 +24,13 @@ pub const INVOCATIONS_METRIC: &str = "aws.lambda.enhanced.invocations"; pub const RX_BYTES_METRIC: &str = "aws.lambda.enhanced.rx_bytes"; pub const TX_BYTES_METRIC: &str = "aws.lambda.enhanced.tx_bytes"; pub const TOTAL_NETWORK_METRIC: &str = "aws.lambda.enhanced.total_network"; +pub const CPU_SYSTEM_TIME_METRIC: &str = "aws.lambda.enhanced.cpu_system_time"; +pub const CPU_USER_TIME_METRIC: &str = "aws.lambda.enhanced.cpu_user_time"; +pub const CPU_TOTAL_TIME_METRIC: &str = "aws.lambda.enhanced.cpu_total_time"; +pub const CPU_TOTAL_UTILIZATION_PCT_METRIC: &str = "aws.lambda.enhanced.cpu_total_utilization_pct"; +pub const CPU_TOTAL_UTILIZATION_METRIC: &str = "aws.lambda.enhanced.cpu_total_utilization"; +pub const NUM_CORES_METRIC: &str = "aws.lambda.enhanced.num_cores"; +pub const CPU_MAX_UTILIZATION_METRIC: &str = "aws.lambda.enhanced.cpu_max_utilization"; +pub const CPU_MIN_UTILIZATION_METRIC: &str = "aws.lambda.enhanced.cpu_min_utilization"; //pub const ASM_INVOCATIONS_METRIC: &str = "aws.lambda.enhanced.asm.invocations"; pub const ENHANCED_METRICS_ENV_VAR: &str = "DD_ENHANCED_METRICS"; diff --git a/bottlecap/src/metrics/enhanced/lambda.rs b/bottlecap/src/metrics/enhanced/lambda.rs index 657e57442..13dd203a9 100644 --- a/bottlecap/src/metrics/enhanced/lambda.rs +++ b/bottlecap/src/metrics/enhanced/lambda.rs @@ -1,5 +1,5 @@ use super::constants::{self, BASE_LAMBDA_INVOCATION_PRICE}; -use crate::proc::{self, NetworkData}; +use crate::proc::{self, CPUData, NetworkData}; use crate::telemetry::events::ReportMetrics; use dogstatsd::aggregator::Aggregator; use dogstatsd::metric; @@ -166,6 +166,183 @@ impl Lambda { } } + pub(crate) fn generate_cpu_time_enhanced_metrics( + cpu_data_offset: &CPUData, + cpu_data_end: &CPUData, + aggr: &mut std::sync::MutexGuard, + ) { + let cpu_user_time = cpu_data_end.total_user_time_ms - cpu_data_offset.total_user_time_ms; + let cpu_system_time = + cpu_data_end.total_system_time_ms - cpu_data_offset.total_system_time_ms; + let cpu_total_time = cpu_user_time + cpu_system_time; + + let metric = Metric::new( + constants::CPU_USER_TIME_METRIC.into(), + MetricValue::distribution(cpu_user_time), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_user_time metric: {}", e); + } + + let metric = Metric::new( + constants::CPU_SYSTEM_TIME_METRIC.into(), + MetricValue::distribution(cpu_system_time), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_system_time metric: {}", e); + } + + let metric = Metric::new( + constants::CPU_TOTAL_TIME_METRIC.into(), + MetricValue::distribution(cpu_total_time), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_total_time metric: {}", e); + } + } + + pub fn set_cpu_time_enhanced_metrics(&self, cpu_offset: Option) { + if !self.config.enhanced_metrics { + return; + } + + let mut aggr: std::sync::MutexGuard = + self.aggregator.lock().expect("lock poisoned"); + + let cpu_data = proc::get_cpu_data(); + match (cpu_offset, cpu_data) { + (Some(cpu_offset), Ok(cpu_data)) => { + Self::generate_cpu_time_enhanced_metrics(&cpu_offset, &cpu_data, &mut aggr); + } + (_, _) => { + debug!("Could not find data to generate cpu time enhanced metrics"); + } + } + } + + pub(crate) fn generate_cpu_utilization_enhanced_metrics( + cpu_data_offset: &CPUData, + cpu_data_end: &CPUData, + uptime_data_offset: f64, + uptime_data_end: f64, + aggr: &mut std::sync::MutexGuard, + ) { + let num_cores = cpu_data_end.individual_cpu_idle_times.len() as f64; + let uptime = uptime_data_end - uptime_data_offset; + let total_idle_time = cpu_data_end.total_idle_time_ms - cpu_data_offset.total_idle_time_ms; + + let mut max_idle_time = 0.0; + let mut min_idle_time = f64::MAX; + + for (cpu_name, cpu_idle_time) in &cpu_data_end.individual_cpu_idle_times { + if let Some(cpu_idle_time_offset) = + cpu_data_offset.individual_cpu_idle_times.get(cpu_name) + { + let idle_time = cpu_idle_time - cpu_idle_time_offset; + if idle_time < min_idle_time { + min_idle_time = idle_time; + } + if idle_time > max_idle_time { + max_idle_time = idle_time; + } + } + } + + // Maximally utilized CPU is the one with the least time spent in the idle process + // Multiply by 100 to report as percentage + let cpu_max_utilization = ((uptime - min_idle_time) / uptime) * 100.0; + + // Minimally utilized CPU is the one with the most time spent in the idle process + // Multiply by 100 to report as percentage + let cpu_min_utilization = ((uptime - max_idle_time) / uptime) * 100.0; + + // CPU total utilization is the proportion of total non-idle time to the total uptime across all cores + let cpu_total_utilization_decimal = + ((uptime * num_cores) - total_idle_time) / (uptime * num_cores); + // Multiply by 100 to report as percentage + let cpu_total_utilization_pct = cpu_total_utilization_decimal * 100.0; + // Multiply by num_cores to report in terms of cores + let cpu_total_utilization = cpu_total_utilization_decimal * num_cores; + + let metric = Metric::new( + constants::CPU_TOTAL_UTILIZATION_PCT_METRIC.into(), + MetricValue::distribution(cpu_total_utilization_pct), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_total_utilization_pct metric: {}", e); + } + + let metric = Metric::new( + constants::CPU_TOTAL_UTILIZATION_METRIC.into(), + MetricValue::distribution(cpu_total_utilization), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_total_utilization metric: {}", e); + } + + let metric = Metric::new( + constants::NUM_CORES_METRIC.into(), + MetricValue::distribution(num_cores), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert num_cores metric: {}", e); + } + + let metric = Metric::new( + constants::CPU_MAX_UTILIZATION_METRIC.into(), + MetricValue::distribution(cpu_max_utilization), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_max_utilization metric: {}", e); + } + + let metric = Metric::new( + constants::CPU_MIN_UTILIZATION_METRIC.into(), + MetricValue::distribution(cpu_min_utilization), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_min_utilization metric: {}", e); + } + } + + pub fn set_cpu_utilization_enhanced_metrics( + &self, + cpu_offset: Option, + uptime_offset: Option, + ) { + if !self.config.enhanced_metrics { + return; + } + + let mut aggr: std::sync::MutexGuard = + self.aggregator.lock().expect("lock poisoned"); + + let cpu_data = proc::get_cpu_data(); + let uptime_data = proc::get_uptime(); + match (cpu_offset, cpu_data, uptime_offset, uptime_data) { + (Some(cpu_offset), Ok(cpu_data), Some(uptime_offset), Ok(uptime_data)) => { + Self::generate_cpu_utilization_enhanced_metrics( + &cpu_offset, + &cpu_data, + uptime_offset, + uptime_data, + &mut aggr, + ); + } + (_, _, _, _) => { + debug!("Could not find data to generate cpu utilization enhanced metrics"); + } + } + } + fn calculate_estimated_cost_usd(billed_duration_ms: u64, memory_size_mb: u64) -> f64 { let gb_seconds = (billed_duration_ms as f64 * constants::MS_TO_SEC) * (memory_size_mb as f64 / constants::MB_TO_GB); @@ -234,9 +411,18 @@ impl Lambda { } } +#[derive(Clone, Debug, PartialEq)] +pub struct EnhancedMetricData { + pub network_offset: Option, + pub cpu_offset: Option, + pub uptime_offset: Option, +} + #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { + use std::collections::HashMap; + use super::*; use crate::config; use dogstatsd::metric::EMPTY_TAGS; @@ -346,6 +532,39 @@ mod tests { assert!(aggr .get_entry_by_id(constants::ESTIMATED_COST_METRIC.into(), &None) .is_none()); + assert!(aggr + .get_entry_by_id(constants::RX_BYTES_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::TX_BYTES_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::TOTAL_NETWORK_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_USER_TIME_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_SYSTEM_TIME_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_TOTAL_TIME_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_TOTAL_UTILIZATION_PCT_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_TOTAL_UTILIZATION_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::NUM_CORES_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_MIN_UTILIZATION_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_MAX_UTILIZATION_METRIC.into(), &None) + .is_none()); } #[test] @@ -393,4 +612,87 @@ mod tests { assert_sketch(&metrics_aggr, constants::TX_BYTES_METRIC, 74746.0); assert_sketch(&metrics_aggr, constants::TOTAL_NETWORK_METRIC, 94746.0); } + + #[test] + fn test_set_cpu_time_enhanced_metrics() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let mut individual_cpu_idle_time_offsets = HashMap::new(); + individual_cpu_idle_time_offsets.insert("cpu0".to_string(), 10.0); + individual_cpu_idle_time_offsets.insert("cpu1".to_string(), 20.0); + let cpu_offset = CPUData { + total_user_time_ms: 100.0, + total_system_time_ms: 3.0, + total_idle_time_ms: 20.0, + individual_cpu_idle_times: individual_cpu_idle_time_offsets, + }; + + let mut individual_cpu_idle_times_end = HashMap::new(); + individual_cpu_idle_times_end.insert("cpu0".to_string(), 30.0); + individual_cpu_idle_times_end.insert("cpu1".to_string(), 80.0); + let cpu_data = CPUData { + total_user_time_ms: 200.0, + total_system_time_ms: 56.0, + total_idle_time_ms: 100.0, + individual_cpu_idle_times: individual_cpu_idle_times_end, + }; + + Lambda::generate_cpu_time_enhanced_metrics( + &cpu_offset, + &cpu_data, + &mut lambda.aggregator.lock().expect("lock poisoned"), + ); + + assert_sketch(&metrics_aggr, constants::CPU_USER_TIME_METRIC, 100.0); + assert_sketch(&metrics_aggr, constants::CPU_SYSTEM_TIME_METRIC, 53.0); + assert_sketch(&metrics_aggr, constants::CPU_TOTAL_TIME_METRIC, 153.0); + } + + #[test] + fn test_set_cpu_utilization_enhanced_metrics() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let mut individual_cpu_idle_time_offsets = HashMap::new(); + individual_cpu_idle_time_offsets.insert("cpu0".to_string(), 10.0); + individual_cpu_idle_time_offsets.insert("cpu1".to_string(), 30.0); + let cpu_offset = CPUData { + total_user_time_ms: 50.0, + total_system_time_ms: 10.0, + total_idle_time_ms: 10.0, + individual_cpu_idle_times: individual_cpu_idle_time_offsets, + }; + let uptime_offset = 1891100.0; + + let mut individual_cpu_idle_times_end = HashMap::new(); + individual_cpu_idle_times_end.insert("cpu0".to_string(), 570.0); + individual_cpu_idle_times_end.insert("cpu1".to_string(), 600.0); + let cpu_data = CPUData { + total_user_time_ms: 200.0, + total_system_time_ms: 170.0, + total_idle_time_ms: 1130.0, + individual_cpu_idle_times: individual_cpu_idle_times_end, + }; + let uptime_data = 1891900.0; + + Lambda::generate_cpu_utilization_enhanced_metrics( + &cpu_offset, + &cpu_data, + uptime_offset, + uptime_data, + &mut lambda.aggregator.lock().expect("lock poisoned"), + ); + + // the differences above and metric values below are from an invocation using the go agent to verify the calculations + assert_sketch( + &metrics_aggr, + constants::CPU_TOTAL_UTILIZATION_PCT_METRIC, + 30.0, + ); + assert_sketch(&metrics_aggr, constants::CPU_TOTAL_UTILIZATION_METRIC, 0.6); + assert_sketch(&metrics_aggr, constants::NUM_CORES_METRIC, 2.0); + assert_sketch(&metrics_aggr, constants::CPU_MAX_UTILIZATION_METRIC, 30.0); + assert_sketch(&metrics_aggr, constants::CPU_MIN_UTILIZATION_METRIC, 28.75); + } } diff --git a/bottlecap/src/proc/clock.rs b/bottlecap/src/proc/clock.rs new file mode 100644 index 000000000..8c7c9b328 --- /dev/null +++ b/bottlecap/src/proc/clock.rs @@ -0,0 +1,20 @@ +use nix::unistd::{sysconf, SysconfVar}; +use std::io; + +#[allow(clippy::cast_sign_loss)] +#[cfg(not(target_os = "windows"))] +pub fn get_clk_tck() -> Result { + match sysconf(SysconfVar::CLK_TCK) { + Ok(Some(clk_tck)) if clk_tck > 0 => Ok(clk_tck as u64), + _ => Err(io::Error::new( + io::ErrorKind::NotFound, + "Could not find system clock ticks per second", + )), + } +} + +#[cfg(target_os = "windows")] +pub fn get_clk_tck() -> Result { + // Windows does not have this concept + Ok(1) +} diff --git a/bottlecap/src/proc/constants.rs b/bottlecap/src/proc/constants.rs index be7986e53..fe06b908d 100644 --- a/bottlecap/src/proc/constants.rs +++ b/bottlecap/src/proc/constants.rs @@ -1,3 +1,5 @@ pub const PROC_NET_DEV_PATH: &str = "/proc/net/dev"; +pub const PROC_STAT_PATH: &str = "/proc/stat"; +pub const PROC_UPTIME_PATH: &str = "/proc/uptime"; pub const LAMDBA_NETWORK_INTERFACE: &str = "vinternal_1"; diff --git a/bottlecap/src/proc/mod.rs b/bottlecap/src/proc/mod.rs index c33a2984e..1a74a2b9d 100644 --- a/bottlecap/src/proc/mod.rs +++ b/bottlecap/src/proc/mod.rs @@ -1,11 +1,13 @@ +pub mod clock; pub mod constants; use std::{ + collections::HashMap, fs::File, io::{self, BufRead}, }; -use constants::{LAMDBA_NETWORK_INTERFACE, PROC_NET_DEV_PATH}; +use constants::{LAMDBA_NETWORK_INTERFACE, PROC_NET_DEV_PATH, PROC_STAT_PATH, PROC_UPTIME_PATH}; #[derive(Copy, Clone, Debug, PartialEq)] pub struct NetworkData { @@ -58,6 +60,126 @@ fn get_network_data_from_path(path: &str) -> Result { )) } +#[derive(Clone, Debug, PartialEq)] +pub struct CPUData { + pub total_user_time_ms: f64, + pub total_system_time_ms: f64, + pub total_idle_time_ms: f64, + pub individual_cpu_idle_times: HashMap, +} + +pub fn get_cpu_data() -> Result { + get_cpu_data_from_path(PROC_STAT_PATH) +} + +fn get_cpu_data_from_path(path: &str) -> Result { + let file = File::open(path)?; + let reader = io::BufReader::new(file); + + let mut cpu_data = CPUData { + total_user_time_ms: 0.0, + total_system_time_ms: 0.0, + total_idle_time_ms: 0.0, + individual_cpu_idle_times: HashMap::new(), + }; + + // SC_CLK_TCK is the system clock frequency in ticks per second + // We'll use this to convert CPU times from user HZ to milliseconds + let clktck = clock::get_clk_tck()? as f64; + + for line in reader.lines() { + let line = line?; + let mut values = line.split_whitespace(); + + if let Some(label) = values.next() { + if label == "cpu" { + // Parse CPU times for total user, system, and idle + let user: Option = values.next().and_then(|s| s.parse().ok()); + values.next(); // skip "nice" + let system: Option = values.next().and_then(|s| s.parse().ok()); + let idle: Option = values.next().and_then(|s| s.parse().ok()); + + match (user, system, idle) { + (Some(user_val), Some(system_val), Some(idle_val)) => { + // Divide values by clock tick to covert to seconds, then multiply by 1000 to convert to ms + cpu_data.total_user_time_ms = (user_val / clktck) * 1000.0; + cpu_data.total_system_time_ms = (system_val / clktck) * 1000.0; + cpu_data.total_idle_time_ms = (idle_val / clktck) * 1000.0; + } + (_, _, _) => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Failed to parse CPU data", + )) + } + } + } else if label.starts_with("cpu") { + // Parse per core (i.e. "cpu0", "cpu1", etc.) idle times + // Skip the first three values (user, nice, system) and get the 4th value (idle) + let idle: Option = values.nth(3).and_then(|s| s.parse().ok()); + + match idle { + Some(idle_val) => { + // Divide value by clock tick to covert to seconds, then multiply by 1000 to convert to ms + cpu_data + .individual_cpu_idle_times + .insert(label.to_string(), (idle_val / clktck) * 1000.0); + } + None => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Failed to parse per-core CPU data", + )) + } + } + } + } + } + + if cpu_data.individual_cpu_idle_times.is_empty() { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "Per-core CPU data not found", + )); + } + + Ok(cpu_data) +} + +pub fn get_uptime() -> Result { + get_uptime_from_path(PROC_UPTIME_PATH) +} + +fn get_uptime_from_path(path: &str) -> Result { + let file = File::open(path)?; + let reader = io::BufReader::new(file); + + if let Some(line) = reader.lines().next() { + let line = line?; + let mut values = line.split_whitespace(); + + let uptime: Option = values.next().and_then(|s| s.parse().ok()); + let idle: Option = values.next().and_then(|s| s.parse().ok()); + + match (uptime, idle) { + // Check that the file is correctly formatted (i.e. has both values) + // Multiply val by 1000 to convert seconds to milliseconds + (Some(uptime_val), Some(_idle_val)) => return Ok(uptime_val * 1000.0), + (_, _) => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Failed to parse uptime data", + )); + } + } + } + + Err(io::Error::new( + io::ErrorKind::NotFound, + "Uptime data not found", + )) +} + #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { @@ -89,4 +211,77 @@ mod tests { let network_data_result = get_network_data_from_path(&path); assert!(network_data_result.is_err()); } + + #[test] + #[allow(clippy::float_cmp)] + fn test_get_cpu_data() { + let path = "./tests/proc/stat/valid_stat"; + let cpu_data_result = get_cpu_data_from_path(&path); + assert!(!cpu_data_result.is_err()); + let cpu_data = cpu_data_result.unwrap(); + assert_eq!(cpu_data.total_user_time_ms, 23370.0); + assert_eq!(cpu_data.total_system_time_ms, 1880.0); + assert_eq!(cpu_data.total_idle_time_ms, 178380.0); + assert_eq!(cpu_data.individual_cpu_idle_times.len(), 2); + assert_eq!( + *cpu_data + .individual_cpu_idle_times + .get("cpu0") + .expect("cpu0 not found"), + 91880.0 + ); + assert_eq!( + *cpu_data + .individual_cpu_idle_times + .get("cpu1") + .expect("cpu1 not found"), + 86490.0 + ); + + let path = "./tests/proc/stat/invalid_stat_non_numerical_value_1"; + let cpu_data_result = get_cpu_data_from_path(&path); + assert!(cpu_data_result.is_err()); + + let path = "./tests/proc/stat/invalid_stat_non_numerical_value_2"; + let cpu_data_result = get_cpu_data_from_path(&path); + assert!(cpu_data_result.is_err()); + + let path = "./tests/proc/stat/invalid_stat_malformed_first_line"; + let cpu_data_result = get_cpu_data_from_path(&path); + assert!(cpu_data_result.is_err()); + + let path = "./tests/proc/stat/invalid_stat_malformed_per_cpu_line"; + let cpu_data_result = get_cpu_data_from_path(&path); + assert!(cpu_data_result.is_err()); + + let path = "./tests/proc/stat/invalid_stat_missing_cpun_data"; + let cpu_data_result = get_cpu_data_from_path(&path); + assert!(cpu_data_result.is_err()); + + let path = "./tests/proc/stat/nonexistent_stat"; + let cpu_data_result = get_cpu_data_from_path(&path); + assert!(cpu_data_result.is_err()); + } + + #[test] + #[allow(clippy::float_cmp)] + fn test_get_uptime_data() { + let path = "./tests/proc/uptime/valid_uptime"; + let uptime_data_result = get_uptime_from_path(&path); + assert!(!uptime_data_result.is_err()); + let uptime_data = uptime_data_result.unwrap(); + assert_eq!(uptime_data, 3213103123000.0); + + let path = "./tests/proc/uptime/invalid_data_uptime"; + let uptime_data_result = get_uptime_from_path(&path); + assert!(uptime_data_result.is_err()); + + let path = "./tests/proc/uptime/malformed_uptime"; + let uptime_data_result = get_uptime_from_path(&path); + assert!(uptime_data_result.is_err()); + + let path = "./tests/proc/uptime/nonexistent_uptime"; + let uptime_data_result = get_uptime_from_path(&path); + assert!(uptime_data_result.is_err()); + } } diff --git a/bottlecap/tests/proc/stat/invalid_stat_malformed_first_line b/bottlecap/tests/proc/stat/invalid_stat_malformed_first_line new file mode 100644 index 000000000..7071a126d --- /dev/null +++ b/bottlecap/tests/proc/stat/invalid_stat_malformed_first_line @@ -0,0 +1,2 @@ +cpu 2337 +... diff --git a/bottlecap/tests/proc/stat/invalid_stat_malformed_per_cpu_line b/bottlecap/tests/proc/stat/invalid_stat_malformed_per_cpu_line new file mode 100644 index 000000000..d4dd4badd --- /dev/null +++ b/bottlecap/tests/proc/stat/invalid_stat_malformed_per_cpu_line @@ -0,0 +1,10 @@ +cpu 2337 0 188 17838 8 0 16 181 0 0 +cpu0 1453 0 87 8649 2 0 10 85 0 0 +cpu1 884 0 ... +intr 67620 0 0 0 0 354 4356 233 1294 89 759 185 359 8 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 135197 +btime 1716225108 +processes 1428 +procs_running 1 +procs_blocked 0 +softirq 27242 0 2425 2 696 6166 0 48 7838 0 10067 diff --git a/bottlecap/tests/proc/stat/invalid_stat_missing_cpun_data b/bottlecap/tests/proc/stat/invalid_stat_missing_cpun_data new file mode 100644 index 000000000..75119c03d --- /dev/null +++ b/bottlecap/tests/proc/stat/invalid_stat_missing_cpun_data @@ -0,0 +1,8 @@ +cpu 2337 0 188 17838 8 0 16 181 0 0 +intr 67620 0 0 0 0 354 4356 233 1294 89 759 185 359 8 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 135197 +btime 1716225108 +processes 1428 +procs_running 1 +procs_blocked 0 +softirq 27242 0 2425 2 696 6166 0 48 7838 0 10067 diff --git a/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_1 b/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_1 new file mode 100644 index 000000000..d72287175 --- /dev/null +++ b/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_1 @@ -0,0 +1,2 @@ +cpu 2337 0 INVALID 17838 8 0 16 181 0 0 +... diff --git a/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_2 b/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_2 new file mode 100644 index 000000000..816ba9009 --- /dev/null +++ b/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_2 @@ -0,0 +1,2 @@ +cpu INVALID 0 188 17838 8 0 16 181 0 0 +... diff --git a/bottlecap/tests/proc/stat/valid_stat b/bottlecap/tests/proc/stat/valid_stat new file mode 100644 index 000000000..d0a082700 --- /dev/null +++ b/bottlecap/tests/proc/stat/valid_stat @@ -0,0 +1,10 @@ +cpu 2337 0 188 17838 8 0 16 181 0 0 +cpu0 884 0 100 9188 5 0 6 95 0 0 +cpu1 1453 0 87 8649 2 0 10 85 0 0 +intr 67620 0 0 0 0 354 4356 233 1294 89 759 185 359 8 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 135197 +btime 1716225108 +processes 1428 +procs_running 1 +procs_blocked 0 +softirq 27242 0 2425 2 696 6166 0 48 7838 0 10067 diff --git a/bottlecap/tests/proc/uptime/invalid_data_uptime b/bottlecap/tests/proc/uptime/invalid_data_uptime new file mode 100644 index 000000000..7fc664612 --- /dev/null +++ b/bottlecap/tests/proc/uptime/invalid_data_uptime @@ -0,0 +1 @@ +3213103123 INVALID diff --git a/bottlecap/tests/proc/uptime/malformed_uptime b/bottlecap/tests/proc/uptime/malformed_uptime new file mode 100644 index 000000000..e75900cd7 --- /dev/null +++ b/bottlecap/tests/proc/uptime/malformed_uptime @@ -0,0 +1 @@ +3213103123 diff --git a/bottlecap/tests/proc/uptime/valid_uptime b/bottlecap/tests/proc/uptime/valid_uptime new file mode 100644 index 000000000..91c626c1b --- /dev/null +++ b/bottlecap/tests/proc/uptime/valid_uptime @@ -0,0 +1 @@ +3213103123 32131 From 70455c905311cacf43f105a80166ee9ac5416373 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Thu, 7 Nov 2024 13:51:29 -0500 Subject: [PATCH 16/41] feat(bottlecap): add SQS inferred span (#434) * wip: sqs * feat: sqs tests * invert duration check * remove duration set * fmt and add `test_get_arn` * remove unneeded reference * remove unneeded comments * add `get_carrier` implementation for `SqsRecord` * add trace context to `sqs_event.json` * fix: resource_names is not needed * fix: don't deserialize body * avoid `use super::...` * fix unit tests * set carrier and trigger tags * remove duplicate tag * fmt * pass headers to `on_invocation_end` * infer first, then extract or else theres nothing to extract, reset values also for next inferr, no need to keep state after we complete * reset values on every infer * move some constants * add missing trigger tags * missed one case --------- Co-authored-by: AJ Stuyvenberg --- .../src/lifecycle/invocation/processor.rs | 44 ++- .../src/lifecycle/invocation/span_inferrer.rs | 67 ++-- .../triggers/api_gateway_http_event.rs | 21 +- .../triggers/api_gateway_rest_event.rs | 19 +- .../src/lifecycle/invocation/triggers/mod.rs | 5 + .../invocation/triggers/sqs_event.rs | 329 ++++++++++++++++++ bottlecap/src/lifecycle/listener.rs | 27 +- bottlecap/tests/payloads/sqs_event.json | 27 ++ 8 files changed, 468 insertions(+), 71 deletions(-) create mode 100644 bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs create mode 100644 bottlecap/tests/payloads/sqs_event.json diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index e7d2925fc..d6679bc2f 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -221,14 +221,15 @@ impl Processor { self.span.trace_id = 0; self.span.parent_id = 0; self.span.span_id = 0; + self.extracted_span_context = None; let payload_value = match serde_json::from_slice::(&payload) { Ok(value) => value, Err(_) => json!({}), }; - self.extracted_span_context = self.extract_span_context(&headers, &payload_value); self.inferrer.infer_span(&payload_value, &self.aws_config); + self.extracted_span_context = self.extract_span_context(&headers, &payload_value); if let Some(sc) = &self.extracted_span_context { self.span.trace_id = sc.trace_id; @@ -280,19 +281,46 @@ impl Processor { /// pub fn on_invocation_end( &mut self, - trace_id: u64, - span_id: u64, - parent_id: u64, + headers: HashMap, status_code: Option, ) { - self.span.trace_id = trace_id; - self.span.span_id = span_id; - + self.update_span_context(headers); if self.inferrer.inferred_span.is_some() { if let Some(status_code) = status_code { self.inferrer.set_status_code(status_code); } - } else { + } + } + + fn update_span_context(&mut self, headers: HashMap) { + // todo: fix this, code is a copy of the existing logic in Go, not accounting + // when a 128 bit trace id exist + let mut trace_id = 0; + let mut span_id = 0; + let mut parent_id = 0; + + // If we have a trace context, update the span context + if let Some(sc) = &mut self.extracted_span_context { + trace_id = sc.trace_id; + span_id = sc.span_id; + } + + if let Some(header) = headers.get("x-datadog-trace-id") { + trace_id = header.parse::().unwrap_or(0); + } + + if let Some(header) = headers.get("x-datadog-span-id") { + span_id = header.parse::().unwrap_or(0); + } + + if let Some(header) = headers.get("x-datadog-parent-id") { + parent_id = header.parse::().unwrap_or(0); + } + + self.span.trace_id = trace_id; + self.span.span_id = span_id; + + if self.inferrer.inferred_span.is_none() { self.span.parent_id = parent_id; } } diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index b63f2a82d..4ad90dffa 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -9,13 +9,10 @@ use crate::config::AwsConfig; use crate::lifecycle::invocation::triggers::{ api_gateway_http_event::APIGatewayHttpEvent, api_gateway_rest_event::APIGatewayRestEvent, - Trigger, + sqs_event::SqsRecord, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, }; use crate::tags::lambda::tags::{INIT_TYPE, SNAP_START_VALUE}; -const FUNCTION_TRIGGER_EVENT_SOURCE_TAG: &str = "function_trigger.event_source"; -const FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG: &str = "function_trigger.event_source_arn"; - pub struct SpanInferrer { pub inferred_span: Option, is_async_span: bool, @@ -46,6 +43,10 @@ impl SpanInferrer { /// pub fn infer_span(&mut self, payload_value: &Value, aws_config: &AwsConfig) { self.inferred_span = None; + self.is_async_span = false; + self.carrier = None; + self.trigger_tags = None; + if APIGatewayHttpEvent::is_match(payload_value) { if let Some(t) = APIGatewayHttpEvent::new(payload_value.clone()) { let mut span = Span { @@ -54,19 +55,14 @@ impl SpanInferrer { }; t.enrich_span(&mut span); - span.meta.extend([ - ( - FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), - "api_gateway".to_string(), - ), - ( - FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), - t.get_arn(&aws_config.region), - ), - ]); + let mut tt = t.get_tags(); + tt.extend([( + FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), + t.get_arn(&aws_config.region), + )]); self.carrier = Some(t.get_carrier()); - self.trigger_tags = Some(t.get_tags()); + self.trigger_tags = Some(tt); self.is_async_span = t.is_async(); self.inferred_span = Some(span); } @@ -78,24 +74,38 @@ impl SpanInferrer { }; t.enrich_span(&mut span); - span.meta.extend([ - ( - FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), - "api_gateway".to_string(), - ), - ( - FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), - t.get_arn(&aws_config.region), - ), - ]); + let mut tt = t.get_tags(); + tt.extend([( + FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), + t.get_arn(&aws_config.region), + )]); + + self.carrier = Some(t.get_carrier()); + self.trigger_tags = Some(tt); + self.is_async_span = t.is_async(); + self.inferred_span = Some(span); + } + } else if SqsRecord::is_match(payload_value) { + if let Some(t) = SqsRecord::new(payload_value.clone()) { + let mut span = Span { + span_id: Self::generate_span_id(), + ..Default::default() + }; + + t.enrich_span(&mut span); + let mut tt = t.get_tags(); + tt.extend([( + FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), + t.get_arn(&aws_config.region), + )]); self.carrier = Some(t.get_carrier()); - self.trigger_tags = Some(t.get_tags()); + self.trigger_tags = Some(tt); self.is_async_span = t.is_async(); self.inferred_span = Some(span); } } else { - debug!("Unable to infer span from payload"); + debug!("Unable to infer span from payload: no matching trigger found"); } } @@ -124,7 +134,8 @@ impl SpanInferrer { pub fn complete_inferred_span(&mut self, invocation_span: &Span) { if let Some(s) = &mut self.inferred_span { if self.is_async_span { - if s.duration != 0 { + // SNS to SQS span duration will be set + if s.duration == 0 { let duration = invocation_span.start - s.start; s.duration = duration; } diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs index 932541a00..e07d86692 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs @@ -6,7 +6,9 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::MS_TO_NS, - triggers::{get_aws_partition_by_region, lowercase_key, Trigger}, + triggers::{ + get_aws_partition_by_region, lowercase_key, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, }; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -114,10 +116,7 @@ impl Trigger for APIGatewayHttpEvent { "request_id".to_string(), self.request_context.request_id.clone(), ), - ("resource_names".to_string(), resource), ])); - - // todo: update global(? IsAsync if event payload is `Event` } fn get_tags(&self) -> HashMap { @@ -140,6 +139,10 @@ impl Trigger for APIGatewayHttpEvent { "http.method".to_string(), self.request_context.http.method.clone(), ), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "api-gateway".to_string(), + ), ]); // route is parameterized // /users/{id}/profile @@ -287,7 +290,6 @@ mod tests { ("http.user_agent".to_string(), "curl/7.64.1".to_string()), ("operation_name".to_string(), "aws.httpapi".to_string()), ("request_id".to_string(), "FaHnXjKCGjQEJ7A=".to_string()), - ("resource_names".to_string(), "GET /httpapi/get".to_string()), ]) ); } @@ -311,6 +313,10 @@ mod tests { ("http.method".to_string(), "GET".to_string()), ("http.route".to_string(), "/httpapi/get".to_string()), ("http.user_agent".to_string(), "curl/7.64.1".to_string()), + ( + "function_trigger.event_source".to_string(), + "api-gateway".to_string(), + ), ]); assert_eq!(tags, expected); @@ -345,7 +351,6 @@ mod tests { ("http.user_agent".to_string(), "curl/8.1.2".to_string()), ("operation_name".to_string(), "aws.httpapi".to_string()), ("request_id".to_string(), "Ur2JtjEfGjQEPOg=".to_string()), - ("resource_names".to_string(), "GET /user/{id}".to_string()), ]) ); } @@ -367,6 +372,10 @@ mod tests { ("http.method".to_string(), "GET".to_string()), ("http.route".to_string(), "/user/{id}".to_string()), ("http.user_agent".to_string(), "curl/8.1.2".to_string()), + ( + "function_trigger.event_source".to_string(), + "api-gateway".to_string(), + ), ]); assert_eq!(tags, expected); } diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs index d2588f37a..e8fc443dd 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs @@ -6,7 +6,9 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::MS_TO_NS, - triggers::{get_aws_partition_by_region, lowercase_key, Trigger}, + triggers::{ + get_aws_partition_by_region, lowercase_key, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, }; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -109,7 +111,6 @@ impl Trigger for APIGatewayRestEvent { "request_id".to_string(), self.request_context.request_id.clone(), ), - ("resource_names".to_string(), resource.clone()), ( "http.route".to_string(), self.request_context.resource_path.clone(), @@ -143,6 +144,10 @@ impl Trigger for APIGatewayRestEvent { "http.user_agent".to_string(), self.request_context.identity.user_agent.to_string(), ), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "api-gateway".to_string(), + ), ]); if let Some(referer) = self.headers.get("referer") { @@ -256,7 +261,6 @@ mod tests { ("http.route".to_string(), "/path".to_string()), ("operation_name".to_string(), "aws.apigateway".to_string()), ("request_id".to_string(), "id=".to_string()), - ("resource_names".to_string(), "GET /path".to_string()), ]) ); } @@ -278,6 +282,10 @@ mod tests { ("http.method".to_string(), "GET".to_string()), ("http.route".to_string(), "/path".to_string()), ("http.user_agent".to_string(), "user-agent".to_string()), + ( + "function_trigger.event_source".to_string(), + "api-gateway".to_string(), + ), ]); assert_eq!(tags, expected); @@ -314,7 +322,6 @@ mod tests { "request_id".to_string(), "e16399f7-e984-463a-9931-745ba021a27f".to_string(), ), - ("resource_names".to_string(), "GET /user/{id}".to_string()), ]); assert_eq!(span.meta, expected); } @@ -342,6 +349,10 @@ mod tests { ("http.method".to_string(), "GET".to_string()), ("http.route".to_string(), "/user/{id}".to_string()), ("http.user_agent".to_string(), "curl/8.1.2".to_string()), + ( + "function_trigger.event_source".to_string(), + "api-gateway".to_string() + ), ]) ); } diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index f04db8a81..a989ce009 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -6,6 +6,11 @@ use serde_json::Value; pub mod api_gateway_http_event; pub mod api_gateway_rest_event; +pub mod sqs_event; + +pub const DATADOG_CARRIER_KEY: &str = "_datadog"; +pub const FUNCTION_TRIGGER_EVENT_SOURCE_TAG: &str = "function_trigger.event_source"; +pub const FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG: &str = "function_trigger.event_source_arn"; pub trait Trigger: Sized { fn new(payload: Value) -> Option; diff --git a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs new file mode 100644 index 000000000..0daed4385 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs @@ -0,0 +1,329 @@ +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::MS_TO_NS, + triggers::{ + get_aws_partition_by_region, Trigger, DATADOG_CARRIER_KEY, + FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct SqsEvent { + #[serde(rename = "Records")] + pub records: Vec, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct SqsRecord { + #[serde(rename = "messageId")] + pub message_id: String, + #[serde(rename = "receiptHandle")] + pub receipt_handle: String, + pub attributes: Attributes, + #[serde(rename = "messageAttributes")] + pub message_attributes: HashMap, + #[serde(rename = "md5OfBody")] + pub md5_of_body: String, + #[serde(rename = "eventSource")] + pub event_source: String, + #[serde(rename = "eventSourceARN")] + pub event_source_arn: String, + #[serde(rename = "awsRegion")] + pub aws_region: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MessageAttribute { + #[serde(rename = "stringValue")] + pub string_value: Option, + #[serde(rename = "binaryValue")] + pub binary_value: Option, + #[serde(rename = "stringListValues")] + pub string_list_values: Option>, + #[serde(rename = "binaryListValues")] + pub binary_list_values: Option>, + #[serde(rename = "dataType")] + pub data_type: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Attributes { + #[serde(rename = "ApproximateFirstReceiveTimestamp")] + pub approximate_first_receive_timestamp: String, + #[serde(rename = "ApproximateReceiveCount")] + pub approximate_receive_count: String, + #[serde(rename = "SentTimestamp")] + pub sent_timestamp: String, + #[serde(rename = "SenderId")] + pub sender_id: String, +} + +impl Trigger for SqsRecord { + fn new(payload: Value) -> Option { + let records = payload.get("Records").and_then(Value::as_array); + match records { + Some(records) => match serde_json::from_value::(records[0].clone()) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize SQS Record: {e}"); + None + } + }, + None => None, + } + } + + fn is_match(payload: &Value) -> bool { + if let Some(first_record) = payload + .get("Records") + .and_then(Value::as_array) + .and_then(|r| r.first()) + .take() + { + first_record + .get("eventSource") + .and_then(Value::as_str) + .map_or(false, |s| s == "aws:sqs") + } else { + false + } + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span) { + debug!("Enriching an Inferred Span for an SQS Event"); + let resource = self + .event_source_arn + .clone() + .split(':') + .last() + .unwrap_or_default() + .to_string(); + let start_time = (self + .attributes + .sent_timestamp + .parse::() + .unwrap_or_default() as f64 + * MS_TO_NS) as i64; + // todo: service mapping + let service_name = "sqs"; + + span.name = "aws.sqs".to_string(); + span.service = service_name.to_string(); + span.resource.clone_from(&resource); + span.r#type = "web".to_string(); + span.start = start_time; + span.meta.extend(HashMap::from([ + ("operation_name".to_string(), "aws.sqs".to_string()), + ("receipt_handle".to_string(), self.receipt_handle.clone()), + ( + "retry_count".to_string(), + self.attributes.approximate_receive_count.clone(), + ), + ("sender_id".to_string(), self.attributes.sender_id.clone()), + ("source_arn".to_string(), self.event_source_arn.clone()), + ("aws_region".to_string(), self.aws_region.clone()), + ])); + } + + fn get_tags(&self) -> HashMap { + HashMap::from([ + ( + "retry_count".to_string(), + self.attributes.approximate_receive_count.clone(), + ), + ("sender_id".to_string(), self.attributes.sender_id.clone()), + ("source_arn".to_string(), self.event_source_arn.clone()), + ("aws_region".to_string(), self.aws_region.clone()), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "sqs".to_string(), + ), + ]) + } + + fn get_arn(&self, region: &str) -> String { + if let [_, _, _, _, account, queue_name] = self + .event_source_arn + .split(':') + .collect::>() + .as_slice() + { + format!( + "arn:{}:sqs:{}:{}:{}", + get_aws_partition_by_region(region), + region, + account, + queue_name + ) + } else { + String::new() + } + } + + fn is_async(&self) -> bool { + true + } + + fn get_carrier(&self) -> HashMap { + let carrier = HashMap::new(); + if let Some(ma) = self.message_attributes.get(DATADOG_CARRIER_KEY) { + if let Some(string_value) = &ma.string_value { + return serde_json::from_str(string_value).unwrap_or_default(); + } + } + + // TODO: AWSTraceHeader + + carrier + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = SqsRecord::new(payload).expect("Failed to deserialize into Record"); + + let message_attributes = HashMap::::from([ + ("_datadog".to_string(), MessageAttribute { + string_value: Some("{\"x-datadog-trace-id\":\"2684756524522091840\",\"x-datadog-parent-id\":\"7431398482019833808\",\"x-datadog-sampling-priority\":\"1\"}".to_string()), + binary_value: None, + string_list_values: Some(vec![]), + binary_list_values: Some(vec![]), + data_type: "String".to_string(), + }) + ]); + + let expected = SqsRecord { + message_id: "19dd0b57-b21e-4ac1-bd88-01bbb068cb78".to_string(), + receipt_handle: "MessageReceiptHandle".to_string(), + attributes: Attributes { + approximate_first_receive_timestamp: "1523232000001".to_string(), + approximate_receive_count: "1".to_string(), + sent_timestamp: "1523232000000".to_string(), + sender_id: "123456789012".to_string(), + }, + message_attributes, + md5_of_body: "{{{md5_of_body}}}".to_string(), + event_source: "aws:sqs".to_string(), + event_source_arn: "arn:aws:sqs:us-east-1:123456789012:MyQueue".to_string(), + aws_region: "us-east-1".to_string(), + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + + assert!(SqsRecord::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!SqsRecord::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + let mut span = Span::default(); + event.enrich_span(&mut span); + assert_eq!(span.name, "aws.sqs"); + assert_eq!(span.service, "sqs"); + assert_eq!(span.resource, "MyQueue"); + assert_eq!(span.r#type, "web"); + + assert_eq!( + span.meta, + HashMap::from([ + ("operation_name".to_string(), "aws.sqs".to_string()), + ( + "receipt_handle".to_string(), + "MessageReceiptHandle".to_string(), + ), + ("retry_count".to_string(), 1.to_string()), + ("sender_id".to_string(), "123456789012".to_string()), + ( + "source_arn".to_string(), + "arn:aws:sqs:us-east-1:123456789012:MyQueue".to_string() + ), + ("aws_region".to_string(), "us-east-1".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + let tags = event.get_tags(); + + let expected = HashMap::from([ + ("retry_count".to_string(), 1.to_string()), + ("sender_id".to_string(), "123456789012".to_string()), + ( + "source_arn".to_string(), + "arn:aws:sqs:us-east-1:123456789012:MyQueue".to_string(), + ), + ("aws_region".to_string(), "us-east-1".to_string()), + ( + "function_trigger.event_source".to_string(), + "sqs".to_string(), + ), + ]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:sqs:us-east-1:123456789012:MyQueue" + ); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "2684756524522091840".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "7431398482019833808".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } +} diff --git a/bottlecap/src/lifecycle/listener.rs b/bottlecap/src/lifecycle/listener.rs index a4d39310b..b255ec491 100644 --- a/bottlecap/src/lifecycle/listener.rs +++ b/bottlecap/src/lifecycle/listener.rs @@ -148,34 +148,11 @@ impl Listener { if let Some(status_code) = parsed_body.unwrap_or_default().get("statusCode") { parsed_status = Some(status_code.to_string()); } - let headers = parts.headers; let mut processor = invocation_processor.lock().await; - // todo: fix this, code is a copy of the existing logic in Go, not accounting - // when a 128 bit trace id exist - let mut trace_id = 0; - if let Some(header) = headers.get("x-datadog-trace-id") { - if let Ok(header_value) = header.to_str() { - trace_id = header_value.parse::().unwrap_or(0); - } - } - - let mut span_id = 0; - if let Some(header) = headers.get("x-datadog-span-id") { - if let Ok(header_value) = header.to_str() { - span_id = header_value.parse::().unwrap_or(0); - } - } - - let mut parent_id = 0; - if let Some(header) = headers.get("x-datadog-parent-id") { - if let Ok(header_value) = header.to_str() { - parent_id = header_value.parse::().unwrap_or(0); - } - } - - processor.on_invocation_end(trace_id, span_id, parent_id, parsed_status); + let headers = Self::headers_to_map(parts.headers); + processor.on_invocation_end(headers, parsed_status); drop(processor); Response::builder() diff --git a/bottlecap/tests/payloads/sqs_event.json b/bottlecap/tests/payloads/sqs_event.json new file mode 100644 index 000000000..5cc7837fd --- /dev/null +++ b/bottlecap/tests/payloads/sqs_event.json @@ -0,0 +1,27 @@ +{ + "Records": [ + { + "messageId": "19dd0b57-b21e-4ac1-bd88-01bbb068cb78", + "receiptHandle": "MessageReceiptHandle", + "body": "Hello from SQS!", + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1523232000000", + "SenderId": "123456789012", + "ApproximateFirstReceiveTimestamp": "1523232000001" + }, + "messageAttributes": { + "_datadog": { + "stringValue": "{\"x-datadog-trace-id\":\"2684756524522091840\",\"x-datadog-parent-id\":\"7431398482019833808\",\"x-datadog-sampling-priority\":\"1\"}", + "stringListValues": [], + "binaryListValues": [], + "dataType": "String" + } + }, + "md5OfBody": "{{{md5_of_body}}}", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:us-east-1:123456789012:MyQueue", + "awsRegion": "us-east-1" + } + ] +} From 1f61d5c5bcea98b77a9837a0dff88c4a4ea7298e Mon Sep 17 00:00:00 2001 From: alexgallotta <5581237+alexgallotta@users.noreply.github.com> Date: Fri, 8 Nov 2024 12:58:48 -0500 Subject: [PATCH 17/41] fix(bottlecap): filter http, tcp and local spans (#439) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * filter http, tcp and local spans * fix original condition * update comments * reuse constants and match sytle * Update bottlecap/src/traces/mod.rs * Update bottlecap/src/traces/mod.rs * Update bottlecap/src/traces/mod.rs * Update bottlecap/src/traces/mod.rs --------- Co-authored-by: jordan gonzález <30836115+duncanista@users.noreply.github.com> --- bottlecap/src/lifecycle/invocation/context.rs | 2 +- .../src/lifecycle/invocation/triggers/mod.rs | 3 +- bottlecap/src/metrics/enhanced/lambda.rs | 4 +- bottlecap/src/proc/mod.rs | 42 ++++++------ bottlecap/src/traces/mod.rs | 21 ++++++ bottlecap/src/traces/propagation/mod.rs | 66 +++++++++---------- .../traces/propagation/text_map_propagator.rs | 4 +- bottlecap/src/traces/trace_processor.rs | 62 +++++++++++++++-- 8 files changed, 139 insertions(+), 65 deletions(-) diff --git a/bottlecap/src/lifecycle/invocation/context.rs b/bottlecap/src/lifecycle/invocation/context.rs index 325fd3cd3..d90a798b9 100644 --- a/bottlecap/src/lifecycle/invocation/context.rs +++ b/bottlecap/src/lifecycle/invocation/context.rs @@ -314,7 +314,7 @@ mod tests { total_user_time_ms: 100.0, total_system_time_ms: 53.0, total_idle_time_ms: 20.0, - individual_cpu_idle_times: individual_cpu_idle_times, + individual_cpu_idle_times, }); let uptime_offset = Some(50.0); diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index a989ce009..f65a9155a 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -52,7 +52,8 @@ where pub mod test_utils { use std::fs; + #[must_use] pub fn read_json_file(file_name: &str) -> String { - fs::read_to_string(format!("tests/payloads/{}", file_name)).expect("Failed to read file") + fs::read_to_string(format!("tests/payloads/{file_name}")).expect("Failed to read file") } } diff --git a/bottlecap/src/metrics/enhanced/lambda.rs b/bottlecap/src/metrics/enhanced/lambda.rs index 13dd203a9..b11427e96 100644 --- a/bottlecap/src/metrics/enhanced/lambda.rs +++ b/bottlecap/src/metrics/enhanced/lambda.rs @@ -663,7 +663,7 @@ mod tests { total_idle_time_ms: 10.0, individual_cpu_idle_times: individual_cpu_idle_time_offsets, }; - let uptime_offset = 1891100.0; + let uptime_offset = 1_891_100.0; let mut individual_cpu_idle_times_end = HashMap::new(); individual_cpu_idle_times_end.insert("cpu0".to_string(), 570.0); @@ -674,7 +674,7 @@ mod tests { total_idle_time_ms: 1130.0, individual_cpu_idle_times: individual_cpu_idle_times_end, }; - let uptime_data = 1891900.0; + let uptime_data = 1_891_900.0; Lambda::generate_cpu_utilization_enhanced_metrics( &cpu_offset, diff --git a/bottlecap/src/proc/mod.rs b/bottlecap/src/proc/mod.rs index 1a74a2b9d..23d6680ec 100644 --- a/bottlecap/src/proc/mod.rs +++ b/bottlecap/src/proc/mod.rs @@ -189,26 +189,26 @@ mod tests { #[allow(clippy::float_cmp)] fn test_get_network_data() { let path = "./tests/proc/net/valid_dev"; - let network_data_result = get_network_data_from_path(&path); - assert!(!network_data_result.is_err()); + let network_data_result = get_network_data_from_path(path); + assert!(network_data_result.is_ok()); let network_data_result = network_data_result.unwrap(); assert_eq!(network_data_result.rx_bytes, 180.0); assert_eq!(network_data_result.tx_bytes, 254.0); let path = "./tests/proc/net/invalid_dev_malformed"; - let network_data_result = get_network_data_from_path(&path); + let network_data_result = get_network_data_from_path(path); assert!(network_data_result.is_err()); let path = "./tests/proc/net/invalid_dev_non_numerical_value"; - let network_data_result = get_network_data_from_path(&path); + let network_data_result = get_network_data_from_path(path); assert!(network_data_result.is_err()); let path = "./tests/proc/net/missing_interface_dev"; - let network_data_result = get_network_data_from_path(&path); + let network_data_result = get_network_data_from_path(path); assert!(network_data_result.is_err()); let path = "./tests/proc/net/nonexistent_dev"; - let network_data_result = get_network_data_from_path(&path); + let network_data_result = get_network_data_from_path(path); assert!(network_data_result.is_err()); } @@ -216,12 +216,12 @@ mod tests { #[allow(clippy::float_cmp)] fn test_get_cpu_data() { let path = "./tests/proc/stat/valid_stat"; - let cpu_data_result = get_cpu_data_from_path(&path); - assert!(!cpu_data_result.is_err()); + let cpu_data_result = get_cpu_data_from_path(path); + assert!(cpu_data_result.is_ok()); let cpu_data = cpu_data_result.unwrap(); assert_eq!(cpu_data.total_user_time_ms, 23370.0); assert_eq!(cpu_data.total_system_time_ms, 1880.0); - assert_eq!(cpu_data.total_idle_time_ms, 178380.0); + assert_eq!(cpu_data.total_idle_time_ms, 178_380.0); assert_eq!(cpu_data.individual_cpu_idle_times.len(), 2); assert_eq!( *cpu_data @@ -239,27 +239,27 @@ mod tests { ); let path = "./tests/proc/stat/invalid_stat_non_numerical_value_1"; - let cpu_data_result = get_cpu_data_from_path(&path); + let cpu_data_result = get_cpu_data_from_path(path); assert!(cpu_data_result.is_err()); let path = "./tests/proc/stat/invalid_stat_non_numerical_value_2"; - let cpu_data_result = get_cpu_data_from_path(&path); + let cpu_data_result = get_cpu_data_from_path(path); assert!(cpu_data_result.is_err()); let path = "./tests/proc/stat/invalid_stat_malformed_first_line"; - let cpu_data_result = get_cpu_data_from_path(&path); + let cpu_data_result = get_cpu_data_from_path(path); assert!(cpu_data_result.is_err()); let path = "./tests/proc/stat/invalid_stat_malformed_per_cpu_line"; - let cpu_data_result = get_cpu_data_from_path(&path); + let cpu_data_result = get_cpu_data_from_path(path); assert!(cpu_data_result.is_err()); let path = "./tests/proc/stat/invalid_stat_missing_cpun_data"; - let cpu_data_result = get_cpu_data_from_path(&path); + let cpu_data_result = get_cpu_data_from_path(path); assert!(cpu_data_result.is_err()); let path = "./tests/proc/stat/nonexistent_stat"; - let cpu_data_result = get_cpu_data_from_path(&path); + let cpu_data_result = get_cpu_data_from_path(path); assert!(cpu_data_result.is_err()); } @@ -267,21 +267,21 @@ mod tests { #[allow(clippy::float_cmp)] fn test_get_uptime_data() { let path = "./tests/proc/uptime/valid_uptime"; - let uptime_data_result = get_uptime_from_path(&path); - assert!(!uptime_data_result.is_err()); + let uptime_data_result = get_uptime_from_path(path); + assert!(uptime_data_result.is_ok()); let uptime_data = uptime_data_result.unwrap(); - assert_eq!(uptime_data, 3213103123000.0); + assert_eq!(uptime_data, 3_213_103_123_000.0); let path = "./tests/proc/uptime/invalid_data_uptime"; - let uptime_data_result = get_uptime_from_path(&path); + let uptime_data_result = get_uptime_from_path(path); assert!(uptime_data_result.is_err()); let path = "./tests/proc/uptime/malformed_uptime"; - let uptime_data_result = get_uptime_from_path(&path); + let uptime_data_result = get_uptime_from_path(path); assert!(uptime_data_result.is_err()); let path = "./tests/proc/uptime/nonexistent_uptime"; - let uptime_data_result = get_uptime_from_path(&path); + let uptime_data_result = get_uptime_from_path(path); assert!(uptime_data_result.is_err()); } } diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index d8facd07c..9c87051cf 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -8,3 +8,24 @@ pub mod stats_processor; pub mod trace_agent; pub mod trace_flusher; pub mod trace_processor; + +// URL for a call to the Lambda runtime API. The value may be replaced if `AWS_LAMBDA_RUNTIME_API` is set. +const LAMBDA_RUNTIME_URL_PREFIX: &str = "http://127.0.0.1:9001"; + +// URL for a call from the Datadog Lambda Library to the Lambda Extension +const LAMBDA_EXTENSION_URL_PREFIX: &str = "http://127.0.0.1:8124"; + +// the first part of a URL for a call from Statsd +const LAMBDA_STATSD_URL_PREFIX: &str = "http://127.0.0.1:8125"; + +// the first part of a URL from the non-routable address for DNS traces +const DNS_NON_ROUTABLE_ADDRESS_URL_PREFIX: &str = "0.0.0.0"; + +// the first part of a URL from the localhost address for DNS traces +const DNS_LOCAL_HOST_ADDRESS_URL_PREFIX: &str = "127.0.0.1"; + +// URL from the `_AWS_XRAY_DAEMON_ADDRESS` for DNS traces +const AWS_XRAY_DAEMON_ADDRESS_URL_PREFIX: &str = "169.254.79.129"; + +// Name of the placeholder invocation span set by Java and Go tracers +const INVOCATION_SPAN_RESOURCE: &str = "dd-tracer-serverless-span"; diff --git a/bottlecap/src/traces/propagation/mod.rs b/bottlecap/src/traces/propagation/mod.rs index e25e9d35d..723666406 100644 --- a/bottlecap/src/traces/propagation/mod.rs +++ b/bottlecap/src/traces/propagation/mod.rs @@ -212,7 +212,7 @@ pub mod tests { use super::*; lazy_static! { - static ref TRACE_ID: u128 = 171395628812617415352188477958425669623; + static ref TRACE_ID: u128 = 171_395_628_812_617_415_352_188_477_958_425_669_623; static ref TRACE_ID_LOWER_ORDER_BITS: u64 = *TRACE_ID as u64; static ref TRACE_ID_HEX: String = String::from("80f198ee56343ba864fe8b2a57d3eff7"); @@ -365,7 +365,7 @@ pub mod tests { None, VALID_DATADOG_HEADERS.clone(), SpanContext { - trace_id: 13088165645273925489, + trace_id: 13_088_165_645_273_925_489, span_id: 5678, sampling: Some(Sampling { priority: Some(1), @@ -382,7 +382,7 @@ pub mod tests { None, VALID_DATADOG_HEADERS_NO_PRIORITY.clone(), SpanContext { - trace_id: 13088165645273925489, + trace_id: 13_088_165_645_273_925_489, span_id: 5678, sampling: Some(Sampling { priority: Some(2), @@ -404,7 +404,7 @@ pub mod tests { Some(vec![TracePropagationStyle::Datadog]), VALID_DATADOG_HEADERS.clone(), SpanContext { - trace_id: 13088165645273925489, + trace_id: 13_088_165_645_273_925_489, span_id: 5678, sampling: Some(Sampling { priority: Some(1), @@ -440,8 +440,8 @@ pub mod tests { Some(vec![TracePropagationStyle::TraceContext]), VALID_TRACECONTEXT_HEADERS_BASIC.clone(), SpanContext { - trace_id: 7277407061855694839, - span_id: 67667974448284343, + trace_id: 7_277_407_061_855_694_839, + span_id: 67_667_974_448_284_343, sampling: Some(Sampling { priority: Some(2), mechanism: None, @@ -460,8 +460,8 @@ pub mod tests { Some(vec![TracePropagationStyle::TraceContext]), VALID_TRACECONTEXT_HEADERS_RUM_NO_SAMPLING_DECISION.clone(), SpanContext { - trace_id: 7277407061855694839, - span_id: 67667974448284343, + trace_id: 7_277_407_061_855_694_839, + span_id: 67_667_974_448_284_343, sampling: Some(Sampling { priority: Some(0), mechanism: None, @@ -484,7 +484,7 @@ pub mod tests { None, ALL_VALID_HEADERS.clone(), SpanContext { - trace_id: 13088165645273925489, + trace_id: 13_088_165_645_273_925_489, span_id: 5678, sampling: Some(Sampling { priority: Some(1), @@ -496,9 +496,9 @@ pub mod tests { ]), links: vec![ SpanLink { - trace_id: 7277407061855694839, + trace_id: 7_277_407_061_855_694_839, trace_id_high: 0, - span_id: 67667974448284343, + span_id: 67_667_974_448_284_343, flags: 1, tracestate: "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string(), attributes: HashMap::from([ @@ -513,7 +513,7 @@ pub mod tests { Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), ALL_VALID_HEADERS.clone(), SpanContext { - trace_id: 13088165645273925489, + trace_id: 13_088_165_645_273_925_489, span_id: 5678, sampling: Some(Sampling { priority: Some(1), @@ -525,9 +525,9 @@ pub mod tests { ]), links: vec![ SpanLink { - trace_id: 7277407061855694839, + trace_id: 7_277_407_061_855_694_839, trace_id_high: 0, - span_id: 67667974448284343, + span_id: 67_667_974_448_284_343, flags: 1, tracestate: "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string(), attributes: HashMap::from([ @@ -543,7 +543,7 @@ pub mod tests { Some(vec![TracePropagationStyle::Datadog]), ALL_VALID_HEADERS.clone(), SpanContext { - trace_id: 13088165645273925489, + trace_id: 13_088_165_645_273_925_489, span_id: 5678, sampling: Some(Sampling { priority: Some(1), @@ -568,7 +568,7 @@ pub mod tests { Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::None]), ALL_VALID_HEADERS.clone(), SpanContext { - trace_id: 13088165645273925489, + trace_id: 13_088_165_645_273_925_489, span_id: 5678, sampling: Some(Sampling { priority: Some(1), @@ -591,8 +591,8 @@ pub mod tests { Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), DATADOG_TRACECONTEXT_MATCHING_TRACE_ID_HEADERS.clone(), SpanContext { - trace_id: 7277407061855694839, - span_id: 67667974448284343, + trace_id: 7_277_407_061_855_694_839, + span_id: 67_667_974_448_284_343, sampling: Some(Sampling { priority: Some(1), mechanism: None, @@ -612,7 +612,7 @@ pub mod tests { Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), NO_TRACESTATE_SUPPORT_NOT_MATCHING_TRACE_ID.clone(), SpanContext { - trace_id: 13088165645273925489, + trace_id: 13_088_165_645_273_925_489, span_id: 5678, sampling: Some(Sampling { priority: Some(1), @@ -624,9 +624,9 @@ pub mod tests { ]), links: vec![ SpanLink { - trace_id: 7277407061855694839, + trace_id: 7_277_407_061_855_694_839, trace_id_high: 0, - span_id: 67667974448284343, + span_id: 67_667_974_448_284_343, flags: 0, tracestate: "dd=o:rum".to_string(), attributes: HashMap::from([ @@ -653,7 +653,7 @@ pub mod tests { ("traceparent".to_string(), "00-000000000000000080f198ee56343ba8-000000000000000a-01".to_string()), ]), SpanContext { - trace_id: 9291375655657946024, + trace_id: 9_291_375_655_657_946_024, span_id: 10, sampling: Some(Sampling { priority: Some(2), @@ -672,8 +672,8 @@ pub mod tests { Some(vec![TracePropagationStyle::TraceContext, TracePropagationStyle::Datadog]), ALL_HEADERS_CHAOTIC_2.clone(), SpanContext { - trace_id: 7277407061855694839, - span_id: 67667974448284343, + trace_id: 7_277_407_061_855_694_839, + span_id: 67_667_974_448_284_343, sampling: Some(Sampling { priority: Some(2), mechanism: None, @@ -688,11 +688,11 @@ pub mod tests { ]), links: vec![ SpanLink { - trace_id: 13088165645273925489, + trace_id: 13_088_165_645_273_925_489, trace_id_high: 0, span_id: 5678, flags: 1, - tracestate: "".to_string(), + tracestate: String::new(), attributes: HashMap::from([ ("reason".to_string(), "terminated_context".to_string()), ("context_headers".to_string(), "datadog".to_string()), @@ -705,8 +705,8 @@ pub mod tests { Some(vec![TracePropagationStyle::TraceContext, TracePropagationStyle::Datadog]), ALL_VALID_HEADERS.clone(), SpanContext { - trace_id: 7277407061855694839, - span_id: 67667974448284343, + trace_id: 7_277_407_061_855_694_839, + span_id: 67_667_974_448_284_343, sampling: Some(Sampling { priority: Some(2), mechanism: None, @@ -721,11 +721,11 @@ pub mod tests { ]), links: vec![ SpanLink { - trace_id: 13088165645273925489, + trace_id: 13_088_165_645_273_925_489, trace_id_high: 0, span_id: 5678, flags: 1, - tracestate: "".to_string(), + tracestate: String::new(), attributes: HashMap::from([ ("reason".to_string(), "terminated_context".to_string()), ("context_headers".to_string(), "datadog".to_string()), @@ -739,7 +739,7 @@ pub mod tests { Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), ALL_VALID_HEADERS.clone(), SpanContext { - trace_id: 13088165645273925489, + trace_id: 13_088_165_645_273_925_489, span_id: 5678, sampling: Some(Sampling { priority: Some(1), @@ -751,11 +751,11 @@ pub mod tests { ]), links: vec![ SpanLink { - trace_id: 7277407061855694839, + trace_id: 7_277_407_061_855_694_839, // this should be `9291375655657946024` not `0`, but we don't have this data // with the current definition of `SpanContext` trace_id_high: 0, - span_id: 67667974448284343, + span_id: 67_667_974_448_284_343, flags: 1, tracestate: "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string(), attributes: HashMap::from([ diff --git a/bottlecap/src/traces/propagation/text_map_propagator.rs b/bottlecap/src/traces/propagation/text_map_propagator.rs index 1a0803aac..34b482643 100644 --- a/bottlecap/src/traces/propagation/text_map_propagator.rs +++ b/bottlecap/src/traces/propagation/text_map_propagator.rs @@ -505,8 +505,8 @@ mod test { .extract(&headers) .expect("couldn't extract trace context"); - assert_eq!(context.trace_id, 7277407061855694839); - assert_eq!(context.span_id, 67667974448284343); + assert_eq!(context.trace_id, 7_277_407_061_855_694_839); + assert_eq!(context.span_id, 67_667_974_448_284_343); assert_eq!(context.sampling.unwrap().priority, Some(2)); assert_eq!(context.origin, Some("rum".to_string())); assert_eq!( diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 90f41d8cd..5974c7d17 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -14,7 +14,13 @@ use std::sync::Arc; use tracing::debug; use crate::config; +use crate::traces::{ + AWS_XRAY_DAEMON_ADDRESS_URL_PREFIX, DNS_LOCAL_HOST_ADDRESS_URL_PREFIX, + DNS_NON_ROUTABLE_ADDRESS_URL_PREFIX, INVOCATION_SPAN_RESOURCE, LAMBDA_EXTENSION_URL_PREFIX, + LAMBDA_RUNTIME_URL_PREFIX, LAMBDA_STATSD_URL_PREFIX, +}; use datadog_trace_obfuscation::obfuscate::obfuscate_span; +use datadog_trace_protobuf::pb::Span; use datadog_trace_utils::trace_utils::SendData; use datadog_trace_utils::trace_utils::{self}; @@ -31,11 +37,10 @@ struct ChunkProcessor { } impl TraceChunkProcessor for ChunkProcessor { - fn process(&mut self, chunk: &mut datadog_trace_protobuf::pb::TraceChunk, _index: usize) { - chunk.spans.retain(|span| { - (span.resource != "127.0.0.1" || span.resource != "0.0.0.0") - && span.name != "dns.lookup" - }); + fn process(&mut self, chunk: &mut pb::TraceChunk, _index: usize) { + chunk + .spans + .retain(|span| !filter_span_from_lambda_library_or_runtime(span)); for span in &mut chunk.spans { self.tags_provider.get_tags_map().iter().for_each(|(k, v)| { span.meta.insert(k.clone(), v.clone()); @@ -49,6 +54,53 @@ impl TraceChunkProcessor for ChunkProcessor { } } +fn filter_span_from_lambda_library_or_runtime(span: &Span) -> bool { + if let Some(url) = span.meta.get("http.url") { + if url.starts_with(LAMBDA_RUNTIME_URL_PREFIX) + || url.starts_with(LAMBDA_EXTENSION_URL_PREFIX) + || url.starts_with(LAMBDA_STATSD_URL_PREFIX) + { + return true; + } + } + + if let (Some(tcp_host), Some(tcp_port)) = ( + span.meta.get("tcp.remote.host"), + span.meta.get("tcp.remote.port"), + ) { + { + let tcp_lambda_url_prefix = format!("http://{tcp_host}:{tcp_port}"); + if tcp_lambda_url_prefix.starts_with(LAMBDA_RUNTIME_URL_PREFIX) + || tcp_lambda_url_prefix.starts_with(LAMBDA_EXTENSION_URL_PREFIX) + || tcp_lambda_url_prefix.starts_with(LAMBDA_STATSD_URL_PREFIX) + { + return true; + } + } + } + + if let Some(dns_address) = span.meta.get("dns.address") { + if dns_address.starts_with(DNS_NON_ROUTABLE_ADDRESS_URL_PREFIX) + || dns_address.starts_with(DNS_LOCAL_HOST_ADDRESS_URL_PREFIX) + || dns_address.starts_with(AWS_XRAY_DAEMON_ADDRESS_URL_PREFIX) + { + return true; + } + } + if span.resource == INVOCATION_SPAN_RESOURCE { + return true; + } + + if span.name == "dns.lookup" + || span.resource == DNS_LOCAL_HOST_ADDRESS_URL_PREFIX + || span.resource == DNS_NON_ROUTABLE_ADDRESS_URL_PREFIX + { + return true; + } + + false +} + #[allow(clippy::module_name_repetitions)] pub trait TraceProcessor { fn process_traces( From 920f2b9b6f27967e878d47cff16d94eb08919621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Fri, 8 Nov 2024 15:13:38 -0500 Subject: [PATCH 18/41] feat(bottlecap): add SNS inferred spans (#437) * wip: sqs * feat: sqs tests * invert duration check * remove duration set * fmt and add `test_get_arn` * remove unneeded reference * remove unneeded comments * add `get_carrier` implementation for `SqsRecord` * add trace context to `sqs_event.json` * fix: resource_names is not needed * fix: don't deserialize body * avoid `use super::...` * fix unit tests * set carrier and trigger tags * remove duplicate tag * fmt * pass headers to `on_invocation_end` * infer first, then extract or else theres nothing to extract, reset values also for next inferr, no need to keep state after we complete * reset values on every infer * add `sns_event.rs` * add `sns_event*.json` payloads * add `base64_to_string` method and also move some variables * surrender resource * use `SnsRecord` for inferred spans * move some constants * add missing trigger tags * missed one case * update unit tests * update `tt` to `t.get_tags()` * fmt * typo --------- Co-authored-by: AJ Stuyvenberg --- .../src/lifecycle/invocation/span_inferrer.rs | 16 +- .../src/lifecycle/invocation/triggers/mod.rs | 10 + .../invocation/triggers/sns_event.rs | 317 ++++++++++++++++++ .../invocation/triggers/sqs_event.rs | 2 +- bottlecap/tests/payloads/sns_event.json | 50 +++ .../tests/payloads/sns_event_binary.json | 27 ++ 6 files changed, 420 insertions(+), 2 deletions(-) create mode 100644 bottlecap/src/lifecycle/invocation/triggers/sns_event.rs create mode 100644 bottlecap/tests/payloads/sns_event.json create mode 100644 bottlecap/tests/payloads/sns_event_binary.json diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index 4ad90dffa..bc6ac7eac 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -9,7 +9,7 @@ use crate::config::AwsConfig; use crate::lifecycle::invocation::triggers::{ api_gateway_http_event::APIGatewayHttpEvent, api_gateway_rest_event::APIGatewayRestEvent, - sqs_event::SqsRecord, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, + sns_event::SnsRecord, sqs_event::SqsRecord, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, }; use crate::tags::lambda::tags::{INIT_TYPE, SNAP_START_VALUE}; @@ -104,6 +104,20 @@ impl SpanInferrer { self.is_async_span = t.is_async(); self.inferred_span = Some(span); } + } else if SnsRecord::is_match(payload_value) { + if let Some(t) = SnsRecord::new(payload_value.clone()) { + let mut span = Span { + span_id: Self::generate_span_id(), + ..Default::default() + }; + + t.enrich_span(&mut span); + + self.carrier = Some(t.get_carrier()); + self.trigger_tags = Some(t.get_tags()); + self.is_async_span = t.is_async(); + self.inferred_span = Some(span); + } } else { debug!("Unable to infer span from payload: no matching trigger found"); } diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index f65a9155a..863fb4019 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -1,11 +1,13 @@ use std::{collections::HashMap, hash::BuildHasher}; +use base64::{engine::general_purpose, Engine}; use datadog_trace_protobuf::pb::Span; use serde::{ser::SerializeMap, Serializer}; use serde_json::Value; pub mod api_gateway_http_event; pub mod api_gateway_rest_event; +pub mod sns_event; pub mod sqs_event; pub const DATADOG_CARRIER_KEY: &str = "_datadog"; @@ -31,6 +33,14 @@ pub fn get_aws_partition_by_region(region: &str) -> String { } } +#[must_use] +pub fn base64_to_string(base64_string: &str) -> String { + let bytes = general_purpose::STANDARD + .decode(base64_string) + .unwrap_or_default(); + String::from_utf8_lossy(&bytes).to_string() +} + /// Serialize a `HashMap` with lowercase keys /// pub fn lowercase_key( diff --git a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs new file mode 100644 index 000000000..443a6ada9 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs @@ -0,0 +1,317 @@ +use std::collections::HashMap; + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::MS_TO_NS, + triggers::{base64_to_string, Trigger, DATADOG_CARRIER_KEY}, +}; + +use super::{FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct SnsEvent { + #[serde(rename = "Records")] + pub records: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct SnsRecord { + #[serde(rename = "Sns")] + pub sns: SnsEntity, + #[serde(rename = "EventSubscriptionArn")] + pub event_subscription_arn: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct SnsEntity { + #[serde(rename = "MessageId")] + pub message_id: String, + #[serde(rename = "Type")] + pub r#type: String, + #[serde(rename = "TopicArn")] + pub topic_arn: String, + #[serde(rename = "MessageAttributes")] + pub message_attributes: HashMap, + #[serde(rename = "Timestamp")] + pub timestamp: DateTime, + #[serde(rename = "Subject")] + pub subject: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MessageAttribute { + #[serde(rename = "Type")] + pub r#type: String, + #[serde(rename = "Value")] + pub value: String, +} + +impl Trigger for SnsRecord { + fn new(payload: serde_json::Value) -> Option { + match payload.get("Records").and_then(Value::as_array) { + Some(records) => match serde_json::from_value::(records[0].clone()) { + Ok(record) => Some(record), + Err(e) => { + debug!("Failed to deserialize SNS Record: {e}"); + None + } + }, + None => None, + } + } + + fn is_match(payload: &serde_json::Value) -> bool { + if let Some(first_record) = payload + .get("Records") + .and_then(Value::as_array) + .and_then(|r| r.first()) + .take() + { + return first_record.get("Sns").is_some(); + } + + false + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut datadog_trace_protobuf::pb::Span) { + debug!("Enriching an Inferred Span for an SNS Event"); + let resource = self + .sns + .topic_arn + .clone() + .split(':') + .last() + .unwrap_or_default() + .to_string(); + + let start_time = self + .sns + .timestamp + .timestamp_nanos_opt() + .unwrap_or((self.sns.timestamp.timestamp_millis() as f64 * MS_TO_NS) as i64); + // todo: service mapping + let service_name = "sns".to_string(); + + span.name = "aws.sns".to_string(); + span.service = service_name.to_string(); + span.resource.clone_from(&resource); + span.r#type = "web".to_string(); + span.start = start_time; + span.meta.extend([ + ("operation_name".to_string(), "aws.sns".to_string()), + ("topicname".to_string(), resource), + ("topic_arn".to_string(), self.sns.topic_arn.clone()), + ("message_id".to_string(), self.sns.message_id.clone()), + ("type".to_string(), self.sns.r#type.clone()), + ]); + + if let Some(subject) = &self.sns.subject { + span.meta.insert("subject".to_string(), subject.clone()); + } + + if let Some(event_subscription_arn) = &self.event_subscription_arn { + span.meta.insert( + "event_subscription_arn".to_string(), + event_subscription_arn.clone(), + ); + } + } + + fn get_tags(&self) -> HashMap { + HashMap::from([ + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "sns".to_string(), + ), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), + self.sns.topic_arn.clone(), + ), + ]) + } + + fn get_arn(&self, _region: &str) -> String { + self.sns.topic_arn.clone() + } + + fn get_carrier(&self) -> HashMap { + let carrier = HashMap::new(); + if let Some(ma) = self.sns.message_attributes.get(DATADOG_CARRIER_KEY) { + match ma.r#type.as_str() { + "String" => return serde_json::from_str(&ma.value).unwrap_or_default(), + "Binary" => { + return serde_json::from_str(&base64_to_string(&ma.value)).unwrap_or_default() + } + _ => { + debug!("Unsupported type in SNS message attribute"); + } + } + } + + carrier + } + + fn is_async(&self) -> bool { + true + } +} + +#[cfg(test)] +mod tests { + use datadog_trace_protobuf::pb::Span; + + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = SnsRecord::new(payload).expect("Failed to deserialize into SnsRecord"); + + let message_attributes = HashMap::::from([ + ("_datadog".to_string(), MessageAttribute { + r#type: "String".to_string(), + value: "{\"x-datadog-trace-id\": \"4948377316357291421\", \"x-datadog-parent-id\": \"6746998015037429512\", \"x-datadog-sampling-priority\": \"1\"}".to_string(), + }) + ]); + + let expected = SnsRecord { + event_subscription_arn: Some("arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04".to_string()), + sns: SnsEntity { + message_id: "87056a47-f506-5d77-908b-303605d3b197".to_string(), + r#type: "Notification".to_string(), + topic_arn: "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy" + .to_string(), + message_attributes, + timestamp: DateTime::parse_from_rfc3339("2022-01-31T14:13:41.637Z") + .unwrap() + .with_timezone(&Utc), + subject: None, + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SnsRecord"); + + assert!(SnsRecord::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!SnsRecord::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + let mut span = Span::default(); + event.enrich_span(&mut span); + assert_eq!(span.name, "aws.sns"); + assert_eq!(span.service, "sns"); + assert_eq!(span.resource, "serverlessTracingTopicPy"); + assert_eq!(span.r#type, "web"); + + assert_eq!( + span.meta, + HashMap::from([ + ("operation_name".to_string(), "aws.sns".to_string()), + ("topicname".to_string(), "serverlessTracingTopicPy".to_string()), + ("topic_arn".to_string(), "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy".to_string()), + ("message_id".to_string(), "87056a47-f506-5d77-908b-303605d3b197".to_string()), + ("type".to_string(), "Notification".to_string()), + ("event_subscription_arn".to_string(), "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04".to_string()) + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + let tags = event.get_tags(); + + let expected = HashMap::from([ + ( + "function_trigger.event_source".to_string(), + "sns".to_string(), + ), + ( + "function_trigger.event_source_arn".to_string(), + "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy".to_string(), + ), + ]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy" + ); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "4948377316357291421".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "6746998015037429512".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } + + #[test] + fn test_get_carrier_from_binary_value() { + let json = read_json_file("sns_event_binary.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "4948377316357291421".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "6746998015037429512".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs index 0daed4385..47c4a6069 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs @@ -115,7 +115,7 @@ impl Trigger for SqsRecord { span.name = "aws.sqs".to_string(); span.service = service_name.to_string(); - span.resource.clone_from(&resource); + span.resource = resource; span.r#type = "web".to_string(); span.start = start_time; span.meta.extend(HashMap::from([ diff --git a/bottlecap/tests/payloads/sns_event.json b/bottlecap/tests/payloads/sns_event.json new file mode 100644 index 000000000..ef8062a0e --- /dev/null +++ b/bottlecap/tests/payloads/sns_event.json @@ -0,0 +1,50 @@ +{ + "Records": [ + { + "EventSource": "aws:sns", + "EventVersion": "1.0", + "EventSubscriptionArn": "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "Sns": { + "Type": "Notification", + "MessageId": "87056a47-f506-5d77-908b-303605d3b197", + "TopicArn": "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy", + "Subject": null, + "Message": "Asynchronously invoking a Lambda function with SNS.", + "Timestamp": "2022-01-31T14:13:41.637Z", + "SignatureVersion": "1", + "Signature": "BmwnJb0Ku2KgQef9QOgaSSTwLyUsbkRq90lzD5Vn4mAcRUOq2ForfMOYbxMB6idljWIWy9t/jK4AIMxPGk/eOGiRcENx3BvAcGcoDayBRFY13+xUGaPn5Lfoht/ZJ7/hmCgFWKRa8ooATZL+AwGAw6Id8qzf0R3M3k2asy5Vxa4ODKiFW9OzWY/zFgsYJhddR3JrQl9YOMRyIobNNHT96o1TwjGsSUTEemrxA6jQtb3QbardEKO+2SuataLEZki7gE2D2sA300WqZecumI339q7la+OIj6VDGDwFoppE2sh8hzJYXAH7oo11giwltE0V3/eLFCVhsE8Y1KD/yDPPsA==", + "SigningCertUrl": "https://sns.sa-east-1.amazonaws.com/SimpleNotificationService-7ff5318490ec183fbaddaa2a969abfda.pem", + "UnsubscribeUrl": "https://sns.sa-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "MessageAttributes": { + "_datadog": { + "Type": "String", + "Value": "{\"x-datadog-trace-id\": \"4948377316357291421\", \"x-datadog-parent-id\": \"6746998015037429512\", \"x-datadog-sampling-priority\": \"1\"}" + } + } + } + }, + { + "EventSource": "aws:sns", + "EventVersion": "1.0", + "EventSubscriptionArn": "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "Sns": { + "Type": "Notification", + "MessageId": "87056a47-f506-5d77-908b-303605d3b197", + "TopicArn": "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy", + "Subject": null, + "Message": "Asynchronously invoking a Lambda function with SNS.", + "Timestamp": "2022-01-31T14:13:41.637Z", + "SignatureVersion": "1", + "Signature": "BmwnJb0Ku2KgQef9QOgaSSTwLyUsbkRq90lzD5Vn4mAcRUOq2ForfMOYbxMB6idljWIWy9t/jK4AIMxPGk/eOGiRcENx3BvAcGcoDayBRFY13+xUGaPn5Lfoht/ZJ7/hmCgFWKRa8ooATZL+AwGAw6Id8qzf0R3M3k2asy5Vxa4ODKiFW9OzWY/zFgsYJhddR3JrQl9YOMRyIobNNHT96o1TwjGsSUTEemrxA6jQtb3QbardEKO+2SuataLEZki7gE2D2sA300WqZecumI339q7la+OIj6VDGDwFoppE2sh8hzJYXAH7oo11giwltE0V3/eLFCVhsE8Y1KD/yDPPsA==", + "SigningCertUrl": "https://sns.sa-east-1.amazonaws.com/SimpleNotificationService-7ff5318490ec183fbaddaa2a969abfda.pem", + "UnsubscribeUrl": "https://sns.sa-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "MessageAttributes": { + "_datadog": { + "Type": "String", + "Value": "{\"x-datadog-trace-id\": \"4948377316357291421\", \"x-datadog-parent-id\": \"6746998015037429512\", \"x-datadog-sampling-priority\": \"1\"}" + } + } + } + } + ] +} diff --git a/bottlecap/tests/payloads/sns_event_binary.json b/bottlecap/tests/payloads/sns_event_binary.json new file mode 100644 index 000000000..4a9a2b500 --- /dev/null +++ b/bottlecap/tests/payloads/sns_event_binary.json @@ -0,0 +1,27 @@ +{ + "Records": [ + { + "EventSource": "aws:sns", + "EventVersion": "1.0", + "EventSubscriptionArn": "arn:aws:sns:eu-west-1:601427279990:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "Sns": { + "Type": "Notification", + "MessageId": "87056a47-f506-5d77-908b-303605d3b197", + "TopicArn": "arn:aws:sns:eu-west-1:601427279990:serverlessTracingTopicPy", + "Subject": null, + "Message": "Asynchronously invoking a Lambda function with SNS.", + "Timestamp": "2022-01-31T14:13:41.637Z", + "SignatureVersion": "1", + "Signature": "BmwnJb0Ku2KgQef9QOgaSSTwLyUsbkRq90lzD5Vn4mAcRUOq2ForfMOYbxMB6idljWIWy9t/jK4AIMxPGk/eOGiRcENx3BvAcGcoDayBRFY13+xUGaPn5Lfoht/ZJ7/hmCgFWKRa8ooATZL+AwGAw6Id8qzf0R3M3k2asy5Vxa4ODKiFW9OzWY/zFgsYJhddR3JrQl9YOMRyIobNNHT96o1TwjGsSUTEemrxA6jQtb3QbardEKO+2SuataLEZki7gE2D2sA300WqZecumI339q7la+OIj6VDGDwFoppE2sh8hzJYXAH7oo11giwltE0V3/eLFCVhsE8Y1KD/yDPPsA==", + "SigningCertUrl": "https://sns.eu-west-1.amazonaws.com/SimpleNotificationService-7ff5318490ec183fbaddaa2a969abfda.pem", + "UnsubscribeUrl": "https://sns.eu-west-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:eu-west-1:601427279990:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "MessageAttributes": { + "_datadog": { + "Type": "Binary", + "Value": "eyJ4LWRhdGFkb2ctdHJhY2UtaWQiOiI0OTQ4Mzc3MzE2MzU3MjkxNDIxIiwieC1kYXRhZG9nLXBhcmVudC1pZCI6IjY3NDY5OTgwMTUwMzc0Mjk1MTIiLCJ4LWRhdGFkb2ctc2FtcGxpbmctcHJpb3JpdHkiOiIxIn0=" + } + } + } + } + ] +} From 113a75927a09a51ee6bab47fed137fdec0855671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Fri, 8 Nov 2024 15:21:13 -0500 Subject: [PATCH 19/41] feat(bottlecap): allow wrapped inferred spans + add SNS-SQS inferred spans (#440) * wip: sqs * feat: sqs tests * invert duration check * remove duration set * fmt and add `test_get_arn` * remove unneeded reference * remove unneeded comments * add `get_carrier` implementation for `SqsRecord` * add trace context to `sqs_event.json` * fix: resource_names is not needed * fix: don't deserialize body * avoid `use super::...` * fix unit tests * set carrier and trigger tags * remove duplicate tag * fmt * pass headers to `on_invocation_end` * infer first, then extract or else theres nothing to extract, reset values also for next inferr, no need to keep state after we complete * reset values on every infer * add `sns_event.rs` * add `sns_event*.json` payloads * add `base64_to_string` method and also move some variables * surrender resource * use `SnsRecord` for inferred spans * move some constants * add missing trigger tags * missed one case * update unit tests * update `tt` to `t.get_tags()` * fmt * typo * update tags * SQS event can contain SNS carrier * make some `Trigger` methods to be `Sized` * add `sns_sqs_event.json` also update path * account for wrapped inferred span in processor * simplify code in `span_inferrer.rs` * remove duplicated condition --------- Co-authored-by: AJ Stuyvenberg --- .../src/lifecycle/invocation/processor.rs | 8 +- .../src/lifecycle/invocation/span_inferrer.rs | 145 ++++++++++-------- .../src/lifecycle/invocation/triggers/mod.rs | 10 +- .../invocation/triggers/sns_event.rs | 32 ++-- .../invocation/triggers/sqs_event.rs | 42 ++++- bottlecap/tests/payloads/sns_sqs_event.json | 20 +++ 6 files changed, 165 insertions(+), 92 deletions(-) create mode 100644 bottlecap/tests/payloads/sns_sqs_event.json diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index d6679bc2f..e37789202 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -149,16 +149,22 @@ impl Processor { self.span.meta.extend(trigger_tags); } - self.inferrer.complete_inferred_span(&self.span); + self.inferrer.complete_inferred_spans(&self.span); if self.tracer_detected { let mut body_size = std::mem::size_of_val(&self.span); let mut traces = vec![self.span.clone()]; + if let Some(inferred_span) = &self.inferrer.inferred_span { body_size += std::mem::size_of_val(inferred_span); traces.push(inferred_span.clone()); } + if let Some(ws) = &self.inferrer.wrapped_inferred_span { + body_size += std::mem::size_of_val(ws); + traces.push(ws.clone()); + } + // todo: figure out what to do here let header_tags = tracer_header_tags::TracerHeaderTags { lang: "", diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index bc6ac7eac..6c84cbe70 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -8,13 +8,17 @@ use tracing::debug; use crate::config::AwsConfig; use crate::lifecycle::invocation::triggers::{ - api_gateway_http_event::APIGatewayHttpEvent, api_gateway_rest_event::APIGatewayRestEvent, - sns_event::SnsRecord, sqs_event::SqsRecord, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, + api_gateway_http_event::APIGatewayHttpEvent, + api_gateway_rest_event::APIGatewayRestEvent, + sns_event::{SnsEntity, SnsRecord}, + sqs_event::SqsRecord, + Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, }; use crate::tags::lambda::tags::{INIT_TYPE, SNAP_START_VALUE}; pub struct SpanInferrer { pub inferred_span: Option, + pub wrapped_inferred_span: Option, is_async_span: bool, carrier: Option>, trigger_tags: Option>, @@ -31,6 +35,7 @@ impl SpanInferrer { pub fn new() -> Self { Self { inferred_span: None, + wrapped_inferred_span: None, is_async_span: false, carrier: None, trigger_tags: None, @@ -43,84 +48,79 @@ impl SpanInferrer { /// pub fn infer_span(&mut self, payload_value: &Value, aws_config: &AwsConfig) { self.inferred_span = None; + self.wrapped_inferred_span = None; self.is_async_span = false; self.carrier = None; self.trigger_tags = None; + let mut trigger: Option> = None; + let mut inferred_span = Span { + span_id: Self::generate_span_id(), + ..Default::default() + }; + if APIGatewayHttpEvent::is_match(payload_value) { if let Some(t) = APIGatewayHttpEvent::new(payload_value.clone()) { - let mut span = Span { - span_id: Self::generate_span_id(), - ..Default::default() - }; - - t.enrich_span(&mut span); - let mut tt = t.get_tags(); - tt.extend([( - FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), - t.get_arn(&aws_config.region), - )]); - - self.carrier = Some(t.get_carrier()); - self.trigger_tags = Some(tt); - self.is_async_span = t.is_async(); - self.inferred_span = Some(span); + t.enrich_span(&mut inferred_span); + + trigger = Some(Box::new(t)); } } else if APIGatewayRestEvent::is_match(payload_value) { if let Some(t) = APIGatewayRestEvent::new(payload_value.clone()) { - let mut span = Span { - span_id: Self::generate_span_id(), - ..Default::default() - }; - - t.enrich_span(&mut span); - let mut tt = t.get_tags(); - tt.extend([( - FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), - t.get_arn(&aws_config.region), - )]); - - self.carrier = Some(t.get_carrier()); - self.trigger_tags = Some(tt); - self.is_async_span = t.is_async(); - self.inferred_span = Some(span); + t.enrich_span(&mut inferred_span); + + trigger = Some(Box::new(t)); } } else if SqsRecord::is_match(payload_value) { if let Some(t) = SqsRecord::new(payload_value.clone()) { - let mut span = Span { - span_id: Self::generate_span_id(), - ..Default::default() - }; - - t.enrich_span(&mut span); - let mut tt = t.get_tags(); - tt.extend([( - FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), - t.get_arn(&aws_config.region), - )]); - - self.carrier = Some(t.get_carrier()); - self.trigger_tags = Some(tt); - self.is_async_span = t.is_async(); - self.inferred_span = Some(span); + t.enrich_span(&mut inferred_span); + + // Check for SNS event wrapped in the SQS body + if let Ok(sns_entity) = serde_json::from_str::(&t.body) { + debug!("Found an SNS event wrapped in the SQS body"); + let mut wrapped_inferred_span = Span { + span_id: Self::generate_span_id(), + ..Default::default() + }; + + let wt = SnsRecord { + sns: sns_entity, + event_subscription_arn: None, + }; + wt.enrich_span(&mut wrapped_inferred_span); + inferred_span.meta.extend(wt.get_tags()); + + wrapped_inferred_span.duration = + inferred_span.start - wrapped_inferred_span.start; + + self.wrapped_inferred_span = Some(wrapped_inferred_span); + } + + trigger = Some(Box::new(t)); } } else if SnsRecord::is_match(payload_value) { if let Some(t) = SnsRecord::new(payload_value.clone()) { - let mut span = Span { - span_id: Self::generate_span_id(), - ..Default::default() - }; - - t.enrich_span(&mut span); + t.enrich_span(&mut inferred_span); - self.carrier = Some(t.get_carrier()); - self.trigger_tags = Some(t.get_tags()); - self.is_async_span = t.is_async(); - self.inferred_span = Some(span); + trigger = Some(Box::new(t)); } } else { debug!("Unable to infer span from payload: no matching trigger found"); } + + // Inferred a trigger + if let Some(t) = trigger { + let mut trigger_tags = t.get_tags(); + trigger_tags.extend([( + FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), + t.get_arn(&aws_config.region), + )]); + + self.trigger_tags = Some(trigger_tags); + self.carrier = Some(t.get_carrier()); + self.is_async_span = t.is_async(); + self.inferred_span = Some(inferred_span); + } } /// If a `self.inferred_span` exist, set the `parent_id` to @@ -144,9 +144,30 @@ impl SpanInferrer { } } - // TODO add status tag and other info from response - pub fn complete_inferred_span(&mut self, invocation_span: &Span) { + // TODO: add status tag and other info from response + // TODO: add peer.service + pub fn complete_inferred_spans(&mut self, invocation_span: &Span) { if let Some(s) = &mut self.inferred_span { + if let Some(ws) = &mut self.wrapped_inferred_span { + // Set correct Parent ID for multiple inferred spans + ws.parent_id = s.parent_id; + s.parent_id = ws.span_id; + + // TODO: clean this logic + if self.is_async_span { + // SNS to SQS span duration will be set + if ws.duration == 0 { + let duration = s.start - ws.start; + ws.duration = duration; + } + } else { + let duration = s.start - ws.start; + ws.duration = duration; + } + + ws.trace_id = invocation_span.trace_id; + } + if self.is_async_span { // SNS to SQS span duration will be set if s.duration == 0 { diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index 863fb4019..6edbe5c07 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -14,9 +14,13 @@ pub const DATADOG_CARRIER_KEY: &str = "_datadog"; pub const FUNCTION_TRIGGER_EVENT_SOURCE_TAG: &str = "function_trigger.event_source"; pub const FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG: &str = "function_trigger.event_source_arn"; -pub trait Trigger: Sized { - fn new(payload: Value) -> Option; - fn is_match(payload: &Value) -> bool; +pub trait Trigger { + fn new(payload: Value) -> Option + where + Self: Sized; + fn is_match(payload: &Value) -> bool + where + Self: Sized; fn enrich_span(&self, span: &mut Span); fn get_tags(&self) -> HashMap; fn get_arn(&self, region: &str) -> String; diff --git a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs index 443a6ada9..cbf313ff6 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs @@ -7,11 +7,9 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::MS_TO_NS, - triggers::{base64_to_string, Trigger, DATADOG_CARRIER_KEY}, + triggers::{base64_to_string, Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, }; -use super::{FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}; - #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct SnsEvent { #[serde(rename = "Records")] @@ -123,16 +121,10 @@ impl Trigger for SnsRecord { } fn get_tags(&self) -> HashMap { - HashMap::from([ - ( - FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), - "sns".to_string(), - ), - ( - FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), - self.sns.topic_arn.clone(), - ), - ]) + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "sns".to_string(), + )]) } fn get_arn(&self, _region: &str) -> String { @@ -246,16 +238,10 @@ mod tests { let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); let tags = event.get_tags(); - let expected = HashMap::from([ - ( - "function_trigger.event_source".to_string(), - "sns".to_string(), - ), - ( - "function_trigger.event_source_arn".to_string(), - "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy".to_string(), - ), - ]); + let expected = HashMap::from([( + "function_trigger.event_source".to_string(), + "sns".to_string(), + )]); assert_eq!(tags, expected); } diff --git a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs index 47c4a6069..4f804c3de 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs @@ -7,8 +7,9 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::MS_TO_NS, triggers::{ - get_aws_partition_by_region, Trigger, DATADOG_CARRIER_KEY, - FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + get_aws_partition_by_region, + sns_event::{SnsEntity, SnsRecord}, + Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, }, }; @@ -35,6 +36,7 @@ pub struct SqsRecord { pub event_source_arn: String, #[serde(rename = "awsRegion")] pub aws_region: String, + pub body: String, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -178,8 +180,19 @@ impl Trigger for SqsRecord { } } - // TODO: AWSTraceHeader + // TODO: Check for EventBridge event sent through SQS + + // Check for SNS event sent through SQS + if let Ok(sns_entity) = serde_json::from_str::(&self.body) { + let sns_record = SnsRecord { + sns: sns_entity, + event_subscription_arn: None, + }; + + return sns_record.get_carrier(); + } + // TODO: AWSTraceHeader carrier } } @@ -219,6 +232,7 @@ mod tests { event_source: "aws:sqs".to_string(), event_source_arn: "arn:aws:sqs:us-east-1:123456789012:MyQueue".to_string(), aws_region: "us-east-1".to_string(), + body: "Hello from SQS!".to_string(), }; assert_eq!(result, expected); @@ -326,4 +340,26 @@ mod tests { assert_eq!(carrier, expected); } + + #[test] + fn test_get_carrier_from_sns() { + let json = read_json_file("sns_sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "2776434475358637757".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "4493917105238181843".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } } diff --git a/bottlecap/tests/payloads/sns_sqs_event.json b/bottlecap/tests/payloads/sns_sqs_event.json new file mode 100644 index 000000000..c1746d8fb --- /dev/null +++ b/bottlecap/tests/payloads/sns_sqs_event.json @@ -0,0 +1,20 @@ +{ + "Records": [ + { + "messageId": "64812b68-4d9b-4dca-b3fb-9b18f255ee51", + "receiptHandle": "AQEBER6aRkfG8092GvkL7FRwCwbQ7LLDW9Tlk/CembqHe+suS2kfFxXiukomvaIN61QoyQMoRgWuV52SDkiQno2u+5hP64BDbmw+e/KR9ayvIfHJ3M6RfyQLaWNWm3hDFBCKTnBMVIxtdx0N9epZZewyokjKcrNYtmCghFgTCvZzsQkowi5rnoHAVHJ3je1c3bDnQ1KLrZFgajDnootYXDwEPuMq5FIxrf4EzTe0S7S+rnRm+GaQfeBLBVAY6dASL9usV3/AFRqDtaI7GKI+0F2NCgLlqj49VlPRz4ldhkGknYlKTZTluAqALWLJS62/J1GQo53Cs3nneJcmu5ajB2zzmhhRXoXINEkLhCD5ujZfcsw9H4xqW69Or4ECvlqx14bUU2rtMIW0QM2p7pEeXnyocymQv6m1te113eYWTVmaJ4I=", + "body": "{\n \"Type\" : \"Notification\",\n \"MessageId\" : \"0a0ab23e-4861-5447-82b7-e8094ff3e332\",\n \"TopicArn\" : \"arn:aws:sns:eu-west-1:601427279990:js-library-test-dev-demoTopic-15WGUVRCBMPAA\",\n \"Message\" : \"{\\\"hello\\\":\\\"harv\\\",\\\"nice of you to join us\\\":\\\"david\\\",\\\"anotherThing\\\":{\\\"foo\\\":\\\"bar\\\",\\\"blah\\\":null,\\\"harv\\\":123},\\\"vals\\\":[{\\\"thingOne\\\":1},{\\\"thingTwo\\\":2}],\\\"ajTimestamp\\\":1639777617957}\",\n \"Timestamp\" : \"2021-12-17T21:46:58.040Z\",\n \"SignatureVersion\" : \"1\",\n \"Signature\" : \"FR35/7E8C3LHEVk/rC4XxXlXwV/5mNkFNPgDhHSnJ2I6hIoSrTROAm7h5xm1PuBkAeFDvq0zofw91ouk9zZyvhdrMLFIIgrjEyNayRmEffmoEAkzLFUsgtQX7MmTl644r4NuWiM0Oiz7jueRvIcKXcZr7Nc6GJcWV1ymec8oOmuHNMisnPMxI07LIQVYSyAfv6P9r2jEWMVIukRoCzwTnRk4bUUYhPSGHI7OC3AsxxXBbv8snqTrLM/4z2rXCf6jHCKNxWeLlm9/45PphCkEyx5BWS4/71KaoMWUWy8+6CCsy+uF3XTCVmvSEYLyEwTSzOY+vCUjazrRW93498i70g==\",\n \"SigningCertUrl\" : \"https://sns.eu-west-1.amazonaws.com/SimpleNotificationService-7ff5318490ec183fbaddaa2a969abfda.pem\",\n \"UnsubscribeUrl\" : \"https://sns.eu-west-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:eu-west-1:601427279990:js-library-test-dev-demoTopic-15WGUVRCBMPAA:1290f550-9a8a-4e8f-a900-8f5f96dcddda\",\n \"MessageAttributes\" : {\n \"_datadog\" : {\"Type\":\"String\",\"Value\":\"{\\\"x-datadog-trace-id\\\":\\\"2776434475358637757\\\",\\\"x-datadog-parent-id\\\":\\\"4493917105238181843\\\",\\\"x-datadog-sampling-priority\\\":\\\"1\\\"}\"}\n }\n}", + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1639777618130", + "SenderId": "AIDAIOA2GYWSHW4E2VXIO", + "ApproximateFirstReceiveTimestamp": "1639777618132" + }, + "messageAttributes": {}, + "md5OfBody": "ee19d8b1377919239ad3fd5dabc33739", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:eu-west-1:601427279990:aj-js-library-test-dev-demo-queue", + "awsRegion": "eu-west-1" + } + ] +} From 6099fa80d0549328fc2be96c4e201002f56a15a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Fri, 8 Nov 2024 20:30:04 -0500 Subject: [PATCH 20/41] feat(bottlecap): add S3 inferred spans (#441) * add `S3Event` * add `s3_event.json` * add `S3Record` into `span_inferrer.rs` --- .../src/lifecycle/invocation/span_inferrer.rs | 8 + .../src/lifecycle/invocation/triggers/mod.rs | 1 + .../lifecycle/invocation/triggers/s3_event.rs | 240 ++++++++++++++++++ bottlecap/tests/payloads/s3_event.json | 38 +++ 4 files changed, 287 insertions(+) create mode 100644 bottlecap/src/lifecycle/invocation/triggers/s3_event.rs create mode 100644 bottlecap/tests/payloads/s3_event.json diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index 6c84cbe70..de2e3e31c 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -16,6 +16,8 @@ use crate::lifecycle::invocation::triggers::{ }; use crate::tags::lambda::tags::{INIT_TYPE, SNAP_START_VALUE}; +use super::triggers::s3_event::S3Record; + pub struct SpanInferrer { pub inferred_span: Option, pub wrapped_inferred_span: Option, @@ -102,6 +104,12 @@ impl SpanInferrer { if let Some(t) = SnsRecord::new(payload_value.clone()) { t.enrich_span(&mut inferred_span); + trigger = Some(Box::new(t)); + } + } else if S3Record::is_match(payload_value) { + if let Some(t) = S3Record::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span); + trigger = Some(Box::new(t)); } } else { diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index 6edbe5c07..66995999f 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -7,6 +7,7 @@ use serde_json::Value; pub mod api_gateway_http_event; pub mod api_gateway_rest_event; +pub mod s3_event; pub mod sns_event; pub mod sqs_event; diff --git a/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs b/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs new file mode 100644 index 000000000..1e7fe5beb --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs @@ -0,0 +1,240 @@ +use std::collections::HashMap; + +use chrono::{DateTime, Utc}; +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::MS_TO_NS, + triggers::{Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct S3Event { + #[serde(rename = "Records")] + pub records: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct S3Record { + #[serde(rename = "eventSource")] + pub event_source: String, + #[serde(rename = "eventTime")] + pub event_time: DateTime, + #[serde(rename = "eventName")] + pub event_name: String, + pub s3: S3Entity, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct S3Entity { + pub bucket: S3Bucket, + pub object: S3Object, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct S3Bucket { + pub name: String, + pub arn: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct S3Object { + pub key: String, + pub size: i64, + #[serde(rename = "eTag")] + pub e_tag: String, +} + +impl Trigger for S3Record { + fn new(payload: serde_json::Value) -> Option { + let records = payload.get("Records").and_then(Value::as_array); + match records { + Some(records) => match serde_json::from_value::(records[0].clone()) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize S3 Record: {e}"); + None + } + }, + None => None, + } + } + + fn is_match(payload: &serde_json::Value) -> bool { + if let Some(first_record) = payload + .get("Records") + .and_then(Value::as_array) + .and_then(|r| r.first()) + .take() + { + first_record.get("s3").is_some() + } else { + false + } + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span) { + debug!("Enriching an InferredSpan span with S3 event"); + let bucket_name = self.s3.bucket.name.clone(); + let start_time = self + .event_time + .timestamp_nanos_opt() + .unwrap_or((self.event_time.timestamp_millis() as f64 * MS_TO_NS) as i64); + // todo: service mapping + let service_name = "s3"; + + span.name = String::from("aws.s3"); + span.service = service_name.to_string(); + span.resource.clone_from(&bucket_name); + span.r#type = String::from("web"); + span.start = start_time; + span.meta.extend(HashMap::from([ + ("operation_name".to_string(), String::from("aws.s3")), + ("event_name".to_string(), self.event_name.clone()), + ("bucketname".to_string(), bucket_name), + ("bucket_arn".to_string(), self.s3.bucket.arn.clone()), + ("object_key".to_string(), self.s3.object.key.clone()), + ("object_size".to_string(), self.s3.object.size.to_string()), + ("object_etag".to_string(), self.s3.object.e_tag.clone()), + ])); + } + + fn get_tags(&self) -> HashMap { + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "s3".to_string(), + )]) + } + + fn get_arn(&self, _region: &str) -> String { + self.event_source.clone() + } + + fn get_carrier(&self) -> HashMap { + HashMap::new() + } + + fn is_async(&self) -> bool { + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = S3Record::new(payload).expect("Failed to deserialize into Record"); + + let expected = S3Record { + event_source: String::from("aws:s3:sample:event:source"), + event_time: DateTime::parse_from_rfc3339("2023-01-07T00:00:00.000Z") + .unwrap() + .with_timezone(&Utc), + event_name: String::from("ObjectCreated:Put"), + s3: S3Entity { + bucket: S3Bucket { + name: String::from("example-bucket"), + arn: String::from("arn:aws:s3:::example-bucket"), + }, + object: S3Object { + key: String::from("test/key"), + size: 1024, + e_tag: String::from("0123456789abcdef0123456789abcdef"), + }, + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize S3Record"); + + assert!(S3Record::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!S3Record::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = S3Record::new(payload).expect("Failed to deserialize S3Record"); + let mut span = Span::default(); + event.enrich_span(&mut span); + assert_eq!(span.name, "aws.s3"); + assert_eq!(span.service, "s3"); + assert_eq!(span.resource, "example-bucket"); + assert_eq!(span.r#type, "web"); + + assert_eq!( + span.meta, + HashMap::from([ + ("operation_name".to_string(), "aws.s3".to_string()), + ("event_name".to_string(), "ObjectCreated:Put".to_string()), + ("bucketname".to_string(), "example-bucket".to_string()), + ( + "bucket_arn".to_string(), + "arn:aws:s3:::example-bucket".to_string() + ), + ("object_key".to_string(), "test/key".to_string()), + ("object_size".to_string(), "1024".to_string()), + ( + "object_etag".to_string(), + "0123456789abcdef0123456789abcdef".to_string() + ) + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = S3Record::new(payload).expect("Failed to deserialize S3Record"); + let tags = event.get_tags(); + + let expected = HashMap::from([( + "function_trigger.event_source".to_string(), + "s3".to_string(), + )]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = S3Record::new(payload).expect("Failed to deserialize S3Record"); + assert_eq!(event.get_arn("us-east-1"), "aws:s3:sample:event:source"); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = S3Record::new(payload).expect("Failed to deserialize SqsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::new(); + + assert_eq!(carrier, expected); + } +} diff --git a/bottlecap/tests/payloads/s3_event.json b/bottlecap/tests/payloads/s3_event.json new file mode 100644 index 000000000..031dc8c3a --- /dev/null +++ b/bottlecap/tests/payloads/s3_event.json @@ -0,0 +1,38 @@ +{ + "Records": [ + { + "eventVersion": "2.0", + "eventSource": "aws:s3:sample:event:source", + "awsRegion": "us-east-1", + "eventTime": "2023-01-07T00:00:00.000Z", + "eventName": "ObjectCreated:Put", + "userIdentity": { + "principalId": "EXAMPLE" + }, + "requestParameters": { + "sourceIPAddress": "127.0.0.1" + }, + "responseElements": { + "x-amz-request-id": "EXAMPLE123456789", + "x-amz-id-2": "EXAMPLE123/5678abcdefghijklambdaisawesome/mnopqrstuvwxyzABCDEFGH" + }, + "s3": { + "s3SchemaVersion": "1.0", + "configurationId": "testConfigRule", + "bucket": { + "name": "example-bucket", + "ownerIdentity": { + "principalId": "EXAMPLE" + }, + "arn": "arn:aws:s3:::example-bucket" + }, + "object": { + "key": "test/key", + "size": 1024, + "eTag": "0123456789abcdef0123456789abcdef", + "sequencer": "0A1B2C3D4E5F678901" + } + } + } + ] +} From 98491a579756c3359769b57ef58ac057ba61b726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Fri, 8 Nov 2024 21:29:10 -0500 Subject: [PATCH 21/41] feat(bottlecap): add DynamoDB inferred spans (#442) * add `S_TO_NS` * add `DynamoDbEvent` * use `DynamoDbEvent` in `SpanInferrer` * update to parse `approximate_creation_date_time` as `f64` --- .../src/lifecycle/invocation/processor.rs | 1 + .../src/lifecycle/invocation/span_inferrer.rs | 7 + .../invocation/triggers/dynamodb_event.rs | 240 ++++++++++++++++++ .../src/lifecycle/invocation/triggers/mod.rs | 1 + bottlecap/tests/payloads/dynamodb_event.json | 93 +++++++ 5 files changed, 342 insertions(+) create mode 100644 bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs create mode 100644 bottlecap/tests/payloads/dynamodb_event.json diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index e37789202..471c00f69 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -25,6 +25,7 @@ use crate::{ }; pub const MS_TO_NS: f64 = 1_000_000.0; +pub const S_TO_NS: f64 = 1_000_000_000.0; pub struct Processor { pub context_buffer: ContextBuffer, diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index de2e3e31c..090292599 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -10,6 +10,7 @@ use crate::config::AwsConfig; use crate::lifecycle::invocation::triggers::{ api_gateway_http_event::APIGatewayHttpEvent, api_gateway_rest_event::APIGatewayRestEvent, + dynamodb_event::DynamoDbRecord, sns_event::{SnsEntity, SnsRecord}, sqs_event::SqsRecord, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, @@ -104,6 +105,12 @@ impl SpanInferrer { if let Some(t) = SnsRecord::new(payload_value.clone()) { t.enrich_span(&mut inferred_span); + trigger = Some(Box::new(t)); + } + } else if DynamoDbRecord::is_match(payload_value) { + if let Some(t) = DynamoDbRecord::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span); + trigger = Some(Box::new(t)); } } else if S3Record::is_match(payload_value) { diff --git a/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs b/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs new file mode 100644 index 000000000..46c20fe27 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs @@ -0,0 +1,240 @@ +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::S_TO_NS, + triggers::{Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct DynamoDbEvent { + #[serde(rename = "Records")] + pub records: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct DynamoDbRecord { + #[serde(rename = "dynamodb")] + pub dynamodb: DynamoDbEntity, + #[serde(rename = "eventID")] + pub event_id: String, + #[serde(rename = "eventName")] + pub event_name: String, + #[serde(rename = "eventVersion")] + pub event_version: String, + #[serde(rename = "eventSourceARN")] + pub event_source_arn: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct DynamoDbEntity { + #[serde(rename = "ApproximateCreationDateTime")] + pub approximate_creation_date_time: f64, + #[serde(rename = "SizeBytes")] + pub size_bytes: i64, + #[serde(rename = "StreamViewType")] + pub stream_view_type: String, +} + +impl Trigger for DynamoDbRecord { + fn new(payload: Value) -> Option + where + Self: Sized, + { + let records = payload.get("Records").and_then(Value::as_array); + match records { + Some(records) => match serde_json::from_value::(records[0].clone()) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize DynamoDB Record: {e}"); + None + } + }, + None => None, + } + } + + fn is_match(payload: &Value) -> bool + where + Self: Sized, + { + if let Some(first_record) = payload + .get("Records") + .and_then(Value::as_array) + .and_then(|r| r.first()) + .take() + { + first_record.get("dynamodb").is_some() + } else { + false + } + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span) { + debug!("Enriching an Inferred Span for a DynamoDB event"); + let table_name = self.event_source_arn.split('/').nth(1).unwrap_or_default(); + let resource = format!("{} {}", self.event_name.clone(), table_name); + + let start_time = (self.dynamodb.approximate_creation_date_time * S_TO_NS) as i64; + // todo: service mapping and peer service + let service_name = "dynamodb"; + + span.name = String::from("aws.dynamodb"); + span.service = service_name.to_string(); + span.resource = resource; + span.r#type = String::from("web"); + span.start = start_time; + span.meta.extend(HashMap::from([ + ("operation_name".to_string(), String::from("aws.dynamodb")), + ("event_id".to_string(), self.event_id.clone()), + ("event_name".to_string(), self.event_name.clone()), + ("event_version".to_string(), self.event_version.clone()), + ( + "event_source_arn".to_string(), + self.event_source_arn.clone(), + ), + ( + "size_bytes".to_string(), + self.dynamodb.size_bytes.to_string(), + ), + ( + "stream_view_type".to_string(), + self.dynamodb.stream_view_type.clone(), + ), + ("table_name".to_string(), table_name.to_string()), + ])); + } + + fn get_tags(&self) -> HashMap { + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "dynamodb".to_string(), + )]) + } + + fn get_arn(&self, _region: &str) -> String { + self.event_source_arn.clone() + } + + fn get_carrier(&self) -> HashMap { + HashMap::new() + } + + fn is_async(&self) -> bool { + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = DynamoDbRecord::new(payload).expect("Failed to deserialize into Record"); + + let expected = DynamoDbRecord { + dynamodb: DynamoDbEntity { + approximate_creation_date_time: 1428537600.0, + size_bytes: 26, + stream_view_type: String::from("NEW_AND_OLD_IMAGES"), + }, + event_id: String::from("c4ca4238a0b923820dcc509a6f75849b"), + event_name: String::from("INSERT"), + event_version: String::from("1.1"), + event_source_arn: String::from("arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899"), + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize DynamoDbRecord"); + + assert!(DynamoDbRecord::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!DynamoDbRecord::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = DynamoDbRecord::new(payload).expect("Failed to deserialize DynamoDbRecord"); + let mut span = Span::default(); + event.enrich_span(&mut span); + assert_eq!(span.name, "aws.dynamodb"); + assert_eq!(span.service, "dynamodb"); + assert_eq!(span.resource, "INSERT ExampleTableWithStream"); + assert_eq!(span.r#type, "web"); + + assert_eq!( + span.meta, + HashMap::from([ + ("operation_name".to_string(), "aws.dynamodb".to_string()), + ("event_id".to_string(), "c4ca4238a0b923820dcc509a6f75849b".to_string()), + ("event_name".to_string(), "INSERT".to_string()), + ("event_version".to_string(), "1.1".to_string()), + ( + "event_source_arn".to_string(), + "arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899".to_string() + ), + ("size_bytes".to_string(), "26".to_string()), + ("stream_view_type".to_string(), "NEW_AND_OLD_IMAGES".to_string()), + ("table_name".to_string(), "ExampleTableWithStream".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = DynamoDbRecord::new(payload).expect("Failed to deserialize DynamoDbRecord"); + let tags = event.get_tags(); + + let expected = HashMap::from([( + "function_trigger.event_source".to_string(), + "dynamodb".to_string(), + )]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = DynamoDbRecord::new(payload).expect("Failed to deserialize DynamoDbRecord"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899" + ); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = DynamoDbRecord::new(payload).expect("Failed to deserialize DynamoDbRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::new(); + + assert_eq!(carrier, expected); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index 66995999f..8cfb2e578 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -7,6 +7,7 @@ use serde_json::Value; pub mod api_gateway_http_event; pub mod api_gateway_rest_event; +pub mod dynamodb_event; pub mod s3_event; pub mod sns_event; pub mod sqs_event; diff --git a/bottlecap/tests/payloads/dynamodb_event.json b/bottlecap/tests/payloads/dynamodb_event.json new file mode 100644 index 000000000..df0cf7ea4 --- /dev/null +++ b/bottlecap/tests/payloads/dynamodb_event.json @@ -0,0 +1,93 @@ +{ + "Records": [ + { + "eventID": "c4ca4238a0b923820dcc509a6f75849b", + "eventName": "INSERT", + "eventVersion": "1.1", + "eventSource": "aws:dynamodb", + "awsRegion": "us-east-1", + "dynamodb": { + "Keys": { + "Id": { + "N": "101" + } + }, + "NewImage": { + "Message": { + "S": "New item!" + }, + "Id": { + "N": "101" + } + }, + "ApproximateCreationDateTime": 1428537600, + "SequenceNumber": "4421584500000000017450439091", + "SizeBytes": 26, + "StreamViewType": "NEW_AND_OLD_IMAGES" + }, + "eventSourceARN": "arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899" + }, + { + "eventID": "c81e728d9d4c2f636f067f89cc14862c", + "eventName": "MODIFY", + "eventVersion": "1.1", + "eventSource": "aws:dynamodb", + "awsRegion": "us-east-1", + "dynamodb": { + "Keys": { + "Id": { + "N": "101" + } + }, + "NewImage": { + "Message": { + "S": "This item has changed" + }, + "Id": { + "N": "101" + } + }, + "OldImage": { + "Message": { + "S": "New item!" + }, + "Id": { + "N": "101" + } + }, + "ApproximateCreationDateTime": 1428537600, + "SequenceNumber": "4421584500000000017450439092", + "SizeBytes": 59, + "StreamViewType": "NEW_AND_OLD_IMAGES" + }, + "eventSourceARN": "arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899" + }, + { + "eventID": "eccbc87e4b5ce2fe28308fd9f2a7baf3", + "eventName": "REMOVE", + "eventVersion": "1.1", + "eventSource": "aws:dynamodb", + "awsRegion": "us-east-1", + "dynamodb": { + "Keys": { + "Id": { + "N": "101" + } + }, + "OldImage": { + "Message": { + "S": "This item has changed" + }, + "Id": { + "N": "101" + } + }, + "ApproximateCreationDateTime": 1428537600, + "SequenceNumber": "4421584500000000017450439093", + "SizeBytes": 38, + "StreamViewType": "NEW_AND_OLD_IMAGES" + }, + "eventSourceARN": "arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899" + } + ] +} From 93472d288c2b0f10b71669c32b4ec065da034027 Mon Sep 17 00:00:00 2001 From: alexgallotta <5581237+alexgallotta@users.noreply.github.com> Date: Mon, 11 Nov 2024 11:20:21 -0500 Subject: [PATCH 22/41] feat(bottlecap): add EventBridge inferred spans (#436) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add eventbridge event * fix test path * add comments with code ref and fix metadata api-gateway * fix error message * clean import * make build faster using host network * fix conflicts and tests * fix test conflicts * resolve merge conflicts * minor changes * add missing unit test * update events for testing * account for millisecond resolution and resource name * fix unit tests * remove `network` tag for runners --------- Co-authored-by: jordan gonzález <30836115+duncanista@users.noreply.github.com> --- .../src/lifecycle/invocation/span_inferrer.rs | 7 + .../invocation/triggers/event_bridge_event.rs | 242 ++++++++++++++++++ .../src/lifecycle/invocation/triggers/mod.rs | 7 +- bottlecap/src/proc/mod.rs | 13 +- .../tests/payloads/eventbridge_event.json | 21 ++ .../eventbridge_no_resource_name_event.json | 19 ++ .../eventbridge_no_timestamp_event.json | 19 ++ .../tests/payloads/eventbridge_span.json | 16 ++ 8 files changed, 340 insertions(+), 4 deletions(-) create mode 100644 bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs create mode 100644 bottlecap/tests/payloads/eventbridge_event.json create mode 100644 bottlecap/tests/payloads/eventbridge_no_resource_name_event.json create mode 100644 bottlecap/tests/payloads/eventbridge_no_timestamp_event.json create mode 100644 bottlecap/tests/payloads/eventbridge_span.json diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index 090292599..edc3253c1 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -11,6 +11,7 @@ use crate::lifecycle::invocation::triggers::{ api_gateway_http_event::APIGatewayHttpEvent, api_gateway_rest_event::APIGatewayRestEvent, dynamodb_event::DynamoDbRecord, + event_bridge_event::EventBridgeEvent, sns_event::{SnsEntity, SnsRecord}, sqs_event::SqsRecord, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, @@ -117,6 +118,12 @@ impl SpanInferrer { if let Some(t) = S3Record::new(payload_value.clone()) { t.enrich_span(&mut inferred_span); + trigger = Some(Box::new(t)); + } + } else if EventBridgeEvent::is_match(payload_value) { + if let Some(t) = EventBridgeEvent::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span); + trigger = Some(Box::new(t)); } } else { diff --git a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs new file mode 100644 index 000000000..f51b111c6 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs @@ -0,0 +1,242 @@ +use chrono::{DateTime, Utc}; +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::{MS_TO_NS, S_TO_NS}, + triggers::{Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, +}; + +const DATADOG_START_TIME_KEY: &str = "x-datadog-start-time"; +const DATADOG_RESOURCE_NAME_KEY: &str = "x-datadog-resource-name"; + +#[derive(Serialize, Deserialize, Debug, PartialEq)] +pub struct EventBridgeEvent { + pub id: String, + pub version: String, + pub account: String, + pub time: DateTime, + pub region: String, + pub resources: Vec, + pub source: String, + #[serde(rename = "detail-type")] + pub detail_type: String, + pub detail: Value, + #[serde(rename = "replay-name")] + pub replay_name: Option, +} + +impl Trigger for EventBridgeEvent { + fn new(payload: Value) -> Option { + match serde_json::from_value(payload) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize EventBridge Event: {}", e); + None + } + } + } + + fn is_match(payload: &Value) -> bool { + payload.get("detail-type").is_some() + && payload + .get("source") + .and_then(Value::as_str) + .map_or(false, |s| s != "aws.events") + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span) { + // EventBridge events have a timestamp resolution in seconds + let start_time_seconds = self + .time + .timestamp_nanos_opt() + .unwrap_or((self.time.timestamp_millis() as f64 * S_TO_NS) as i64); + + let carrier = self.get_carrier(); + let resource_name = carrier + .get(DATADOG_RESOURCE_NAME_KEY) + .unwrap_or(&self.source) + .clone(); + let start_time = carrier + .get(DATADOG_START_TIME_KEY) + .and_then(|s| s.parse::().ok()) + .map_or(start_time_seconds, |s| (s * MS_TO_NS) as i64); + + // todo: service mapping and peer service + let service_name = "eventbridge"; + + span.name = String::from("aws.eventbridge"); + span.service = service_name.to_string(); + span.resource = resource_name; + span.r#type = String::from("web"); + span.start = start_time; + span.meta.extend(HashMap::from([ + ("operation_name".to_string(), "aws.eventbridge".to_string()), + ("detail_type".to_string(), self.detail_type.clone()), + ])); + } + + fn get_tags(&self) -> HashMap { + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "eventbridge".to_string(), + )]) + } + + fn get_arn(&self, _region: &str) -> String { + self.source.clone() + } + + fn get_carrier(&self) -> HashMap { + if let Ok(detail) = serde_json::from_value::>(self.detail.clone()) { + if let Some(carrier) = detail.get(DATADOG_CARRIER_KEY) { + return serde_json::from_value(carrier.clone()).unwrap_or_default(); + } + } + HashMap::new() + } + + fn is_async(&self) -> bool { + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = + EventBridgeEvent::new(payload).expect("Failed to deserialize into EventBridgeEvent"); + + let expected = EventBridgeEvent { + id: "bd3c8258-8d30-007c-2562-64715b2d0ea8".to_string(), + version: "0".to_string(), + account: "601427279990".to_string(), + time: DateTime::parse_from_rfc3339("2024-11-09T08:22:15Z") + .expect("Failed to parse time") + .with_timezone(&Utc), + region: "eu-west-1".to_string(), + resources: vec![], + source: "my.event".to_string(), + detail_type: "UserSignUp".to_string(), + detail: serde_json::json!({ + "hello": "there", + "_datadog": { + "x-datadog-trace-id": "5827606813695714842", + "x-datadog-parent-id": "4726693487091824375", + "x-datadog-sampled": "1", + "x-datadog-sampling-priority": "1", + "x-datadog-resource-name": "testBus", + "x-datadog-start-time": "1731183820135" + } + }), + replay_name: None, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize EventBridgeEvent"); + + assert!(EventBridgeEvent::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize EventBridgeEvent"); + assert!(!EventBridgeEvent::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + EventBridgeEvent::new(payload).expect("Failed to deserialize into EventBridgeEvent"); + + let mut span = Span::default(); + event.enrich_span(&mut span); + + let expected = serde_json::from_str(&read_json_file("eventbridge_span.json")) + .expect("Failed to deserialize into Span"); + assert_eq!(span, expected); + } + + #[test] + fn test_enrich_span_no_resource_name() { + let json = read_json_file("eventbridge_no_resource_name_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + EventBridgeEvent::new(payload).expect("Failed to deserialize into EventBridgeEvent"); + + let mut span = Span::default(); + event.enrich_span(&mut span); + + assert_eq!(span.resource, "my.event"); + } + + #[test] + fn test_enrich_span_no_timestamp() { + let json = read_json_file("eventbridge_no_timestamp_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + EventBridgeEvent::new(payload).expect("Failed to deserialize into EventBridgeEvent"); + + let mut span = Span::default(); + event.enrich_span(&mut span); + + assert_eq!(span.resource, "testBus"); + // Seconds resolution + assert_eq!(span.start, 1731140535000000000); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = EventBridgeEvent::new(payload).expect("Failed to deserialize EventBridgeEvent"); + assert_eq!(event.get_arn("us-east-1"), "my.event"); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + EventBridgeEvent::new(payload).expect("Failed to deserialize EventBridge Event"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "5827606813695714842".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "4726693487091824375".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ("x-datadog-sampled".to_string(), "1".to_string()), + ("x-datadog-resource-name".to_string(), "testBus".to_string()), + ( + "x-datadog-start-time".to_string(), + "1731183820135".to_string(), + ), + ]); + + assert_eq!(carrier, expected); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index 8cfb2e578..e0d347f08 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -8,6 +8,7 @@ use serde_json::Value; pub mod api_gateway_http_event; pub mod api_gateway_rest_event; pub mod dynamodb_event; +pub mod event_bridge_event; pub mod s3_event; pub mod sns_event; pub mod sqs_event; @@ -67,9 +68,13 @@ where #[cfg(test)] pub mod test_utils { use std::fs; + use std::path::PathBuf; #[must_use] pub fn read_json_file(file_name: &str) -> String { - fs::read_to_string(format!("tests/payloads/{file_name}")).expect("Failed to read file") + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("tests/payloads"); + path.push(file_name); + fs::read_to_string(path).expect("Failed to read file") } } diff --git a/bottlecap/src/proc/mod.rs b/bottlecap/src/proc/mod.rs index 23d6680ec..3dfa1a67f 100644 --- a/bottlecap/src/proc/mod.rs +++ b/bottlecap/src/proc/mod.rs @@ -184,12 +184,19 @@ fn get_uptime_from_path(path: &str) -> Result { #[allow(clippy::unwrap_used)] mod tests { use super::*; + use std::path::PathBuf; + + fn path_from_root(file: &str) -> String { + let mut safe_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + safe_path.push(file); + safe_path.to_str().unwrap().to_string() + } #[test] #[allow(clippy::float_cmp)] fn test_get_network_data() { let path = "./tests/proc/net/valid_dev"; - let network_data_result = get_network_data_from_path(path); + let network_data_result = get_network_data_from_path(path_from_root(path).as_str()); assert!(network_data_result.is_ok()); let network_data_result = network_data_result.unwrap(); assert_eq!(network_data_result.rx_bytes, 180.0); @@ -216,7 +223,7 @@ mod tests { #[allow(clippy::float_cmp)] fn test_get_cpu_data() { let path = "./tests/proc/stat/valid_stat"; - let cpu_data_result = get_cpu_data_from_path(path); + let cpu_data_result = get_cpu_data_from_path(path_from_root(path).as_str()); assert!(cpu_data_result.is_ok()); let cpu_data = cpu_data_result.unwrap(); assert_eq!(cpu_data.total_user_time_ms, 23370.0); @@ -267,7 +274,7 @@ mod tests { #[allow(clippy::float_cmp)] fn test_get_uptime_data() { let path = "./tests/proc/uptime/valid_uptime"; - let uptime_data_result = get_uptime_from_path(path); + let uptime_data_result = get_uptime_from_path(path_from_root(path).as_str()); assert!(uptime_data_result.is_ok()); let uptime_data = uptime_data_result.unwrap(); assert_eq!(uptime_data, 3_213_103_123_000.0); diff --git a/bottlecap/tests/payloads/eventbridge_event.json b/bottlecap/tests/payloads/eventbridge_event.json new file mode 100644 index 000000000..8c9d91d9e --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_event.json @@ -0,0 +1,21 @@ +{ + "version": "0", + "id": "bd3c8258-8d30-007c-2562-64715b2d0ea8", + "detail-type": "UserSignUp", + "source": "my.event", + "account": "601427279990", + "time": "2024-11-09T08:22:15Z", + "region": "eu-west-1", + "resources": [], + "detail": { + "hello": "there", + "_datadog": { + "x-datadog-trace-id": "5827606813695714842", + "x-datadog-parent-id": "4726693487091824375", + "x-datadog-sampled": "1", + "x-datadog-sampling-priority": "1", + "x-datadog-resource-name": "testBus", + "x-datadog-start-time": "1731183820135" + } + } +} diff --git a/bottlecap/tests/payloads/eventbridge_no_resource_name_event.json b/bottlecap/tests/payloads/eventbridge_no_resource_name_event.json new file mode 100644 index 000000000..778e40628 --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_no_resource_name_event.json @@ -0,0 +1,19 @@ +{ + "version": "0", + "id": "bd3c8258-8d30-007c-2562-64715b2d0ea8", + "detail-type": "UserSignUp", + "source": "my.event", + "account": "601427279990", + "time": "2024-11-09T08:22:15Z", + "region": "eu-west-1", + "resources": [], + "detail": { + "hello": "there", + "_datadog": { + "x-datadog-trace-id": "5827606813695714842", + "x-datadog-parent-id": "4726693487091824375", + "x-datadog-sampling-priority": "1", + "x-datadog-start-time": "1731183820135" + } + } +} diff --git a/bottlecap/tests/payloads/eventbridge_no_timestamp_event.json b/bottlecap/tests/payloads/eventbridge_no_timestamp_event.json new file mode 100644 index 000000000..d5e8d9c6c --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_no_timestamp_event.json @@ -0,0 +1,19 @@ +{ + "version": "0", + "id": "bd3c8258-8d30-007c-2562-64715b2d0ea8", + "detail-type": "UserSignUp", + "source": "my.event", + "account": "601427279990", + "time": "2024-11-09T08:22:15Z", + "region": "eu-west-1", + "resources": [], + "detail": { + "hello": "there", + "_datadog": { + "x-datadog-trace-id": "5827606813695714842", + "x-datadog-parent-id": "4726693487091824375", + "x-datadog-sampling-priority": "1", + "x-datadog-resource-name": "testBus" + } + } +} diff --git a/bottlecap/tests/payloads/eventbridge_span.json b/bottlecap/tests/payloads/eventbridge_span.json new file mode 100644 index 000000000..0515abd69 --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_span.json @@ -0,0 +1,16 @@ +{ + "service": "eventbridge", + "name": "aws.eventbridge", + "resource": "testBus", + "trace_id": 0, + "span_id": 0, + "parent_id": 0, + "start": 1731183820135000064, + "duration": 0, + "meta": { + "operation_name": "aws.eventbridge", + "detail_type": "UserSignUp" + }, + "metrics": {}, + "type": "web" +} \ No newline at end of file From 7230ce3aa651b4400cafe8fc08372d563151d95d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Mon, 11 Nov 2024 15:24:14 -0500 Subject: [PATCH 23/41] chore(bottlecap): move `EnhancedMetrics` to live in `InvocationProcessor` (#446) * move `EnhancedMetrics` to live in `InvocationProcessor` * rename field to `enhanced_metrics_enabled` --- bottlecap/src/bin/bottlecap/main.rs | 50 ++++-------- .../src/lifecycle/invocation/processor.rs | 79 ++++++++++++++----- 2 files changed, 72 insertions(+), 57 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 21f6c777a..6856bd8a3 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -23,13 +23,12 @@ use bottlecap::{ agent::LogsAgent, flusher::{build_fqdn_logs, Flusher as LogsFlusher}, }, - metrics::enhanced::lambda::Lambda as enhanced_metrics, secrets::decrypt, tags::{lambda, provider::Provider as TagProvider}, telemetry::{ self, client::TelemetryApiClient, - events::{Status, TelemetryEvent, TelemetryRecord}, + events::{TelemetryEvent, TelemetryRecord}, listener::TelemetryListener, }, traces::{ @@ -296,10 +295,12 @@ async fn extension_loop_active( buffer: Arc::new(TokioMutex::new(Vec::new())), }); + // Lifecycle Invocation Processor let invocation_processor = Arc::new(TokioMutex::new(InvocationProcessor::new( Arc::clone(&tags_provider), Arc::clone(config), aws_config, + Arc::clone(&metrics_aggr), ))); let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor { obfuscation_config: Arc::new( @@ -350,8 +351,6 @@ async fn extension_loop_active( } }); - let lambda_enhanced_metrics = - enhanced_metrics::new(Arc::clone(&metrics_aggr), Arc::clone(config)); let dogstatsd_cancel_token = start_dogstatsd(&metrics_aggr).await; let telemetry_listener_cancel_token = @@ -375,7 +374,6 @@ async fn extension_loop_active( "Invoke event {}; deadline: {}, invoked_function_arn: {}", request_id, deadline_ms, invoked_function_arn ); - lambda_enhanced_metrics.increment_invocation_metric(); let mut p = invocation_processor.lock().await; p.on_invoke_event(request_id); drop(p); @@ -416,8 +414,9 @@ async fn extension_loop_active( metrics, } => { debug!("Platform init report for initialization_type: {:?} with phase: {:?} and metrics: {:?}", initialization_type, phase, metrics); - lambda_enhanced_metrics - .set_init_duration_metric(metrics.duration_ms); + let mut p = invocation_processor.lock().await; + p.on_platform_init_report(metrics.duration_ms); + drop(p); } TelemetryRecord::PlatformRuntimeDone { request_id, @@ -425,38 +424,25 @@ async fn extension_loop_active( metrics, .. } => { + debug!( + "Runtime done for request_id: {:?} with status: {:?}", + request_id, status + ); + let mut p = invocation_processor.lock().await; - let mut enhanced_metric_data = None; if let Some(metrics) = metrics { - enhanced_metric_data = p.on_platform_runtime_done( + p.on_platform_runtime_done( &request_id, metrics.duration_ms, + status, config.clone(), tags_provider.clone(), trace_processor.clone(), trace_agent_tx.clone() ).await; - lambda_enhanced_metrics - .set_runtime_duration_metric(metrics.duration_ms); } drop(p); - if status != Status::Success { - lambda_enhanced_metrics.increment_errors_metric(); - if status == Status::Timeout { - lambda_enhanced_metrics.increment_timeout_metric(); - } - } - debug!( - "Runtime done for request_id: {:?} with status: {:?}", - request_id, status - ); - - // set cpu utilization metrics here to avoid accounting for extra idle time - if let Some(offsets) = enhanced_metric_data { - lambda_enhanced_metrics.set_cpu_utilization_enhanced_metrics(offsets.cpu_offset, offsets.uptime_offset); - } - // TODO(astuyve) it'll be easy to // pass the invocation deadline to // flush tasks here, so they can @@ -482,16 +468,8 @@ async fn extension_loop_active( "Platform report for request_id: {:?} with status: {:?}", request_id, status ); - lambda_enhanced_metrics.set_report_log_metrics(&metrics); let mut p = invocation_processor.lock().await; - let (post_runtime_duration_ms, enhanced_metric_data) = p.on_platform_report(&request_id, metrics.duration_ms); - if let Some(duration) = post_runtime_duration_ms { - lambda_enhanced_metrics.set_post_runtime_duration_metric(duration); - } - if let Some(offsets) = enhanced_metric_data { - lambda_enhanced_metrics.set_network_enhanced_metrics(offsets.network_offset); - lambda_enhanced_metrics.set_cpu_time_enhanced_metrics(offsets.cpu_offset); - } + p.on_platform_report(&request_id, metrics); drop(p); if shutdown { diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 471c00f69..467519e37 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -1,12 +1,13 @@ use std::{ collections::HashMap, - sync::Arc, + sync::{Arc, Mutex}, time::{SystemTime, UNIX_EPOCH}, }; use chrono::{DateTime, Utc}; use datadog_trace_protobuf::pb::Span; use datadog_trace_utils::{send_data::SendData, tracer_header_tags}; +use dogstatsd::aggregator::Aggregator as MetricsAggregator; use serde_json::{json, Value}; use tokio::sync::mpsc::Sender; use tracing::debug; @@ -14,9 +15,10 @@ use tracing::debug; use crate::{ config::{self, AwsConfig}, lifecycle::invocation::{context::ContextBuffer, span_inferrer::SpanInferrer}, - metrics::enhanced::lambda::EnhancedMetricData, + metrics::enhanced::lambda::{EnhancedMetricData, Lambda as EnhancedMetrics}, proc::{self, CPUData, NetworkData}, tags::provider, + telemetry::events::{ReportMetrics, Status}, traces::{ context::SpanContext, propagation::{DatadogCompositePropagator, Propagator}, @@ -34,9 +36,10 @@ pub struct Processor { pub extracted_span_context: Option, // Used to extract the trace context from inferred span, headers, or payload propagator: DatadogCompositePropagator, + enhanced_metrics: EnhancedMetrics, aws_config: AwsConfig, tracer_detected: bool, - collect_enhanced_data: bool, + enhanced_metrics_enabled: bool, } impl Processor { @@ -45,6 +48,7 @@ impl Processor { tags_provider: Arc, config: Arc, aws_config: &AwsConfig, + metrics_aggregator: Arc>, ) -> Self { let service = config.service.clone().unwrap_or("aws.lambda".to_string()); let resource = tags_provider @@ -74,9 +78,10 @@ impl Processor { }, extracted_span_context: None, propagator, + enhanced_metrics: EnhancedMetrics::new(metrics_aggregator, Arc::clone(&config)), aws_config: aws_config.clone(), tracer_detected: false, - collect_enhanced_data: config.enhanced_metrics, + enhanced_metrics_enabled: config.enhanced_metrics, } } @@ -84,7 +89,7 @@ impl Processor { /// pub fn on_invoke_event(&mut self, request_id: String) { self.context_buffer.create_context(request_id.clone()); - if self.collect_enhanced_data { + if self.enhanced_metrics_enabled { let network_offset: Option = proc::get_network_data().ok(); let cpu_offset: Option = proc::get_cpu_data().ok(); let uptime_offset: Option = proc::get_uptime().ok(); @@ -96,6 +101,15 @@ impl Processor { self.context_buffer .add_enhanced_metric_data(&request_id, enhanced_metric_offsets); } + + // Increment the invocation metric + self.enhanced_metrics.increment_invocation_metric(); + } + + /// Given the duration of the platform init report, set the init duration metric. + /// + pub fn on_platform_init_report(&mut self, duration_ms: f64) { + self.enhanced_metrics.set_init_duration_metric(duration_ms); } /// Given a `request_id` and the time of the platform start, add the start time to the context buffer. @@ -113,20 +127,35 @@ impl Processor { self.span.start = start_time; } + #[allow(clippy::too_many_arguments)] #[allow(clippy::cast_possible_truncation)] pub async fn on_platform_runtime_done( &mut self, request_id: &String, duration_ms: f64, + status: Status, config: Arc, tags_provider: Arc, trace_processor: Arc, trace_agent_tx: Sender, - ) -> Option { + ) { self.context_buffer .add_runtime_duration(request_id, duration_ms); - let mut enhanced_metric_data: Option = None; + // Set the runtime duration metric + self.enhanced_metrics + .set_runtime_duration_metric(duration_ms); + + if status != Status::Success { + // Increment the error metric + self.enhanced_metrics.increment_errors_metric(); + + // Increment the error type metric + if status == Status::Timeout { + self.enhanced_metrics.increment_timeout_metric(); + } + } + if let Some(context) = self.context_buffer.get(request_id) { let span = &mut self.span; // `round` is intentionally meant to be a whole integer @@ -143,7 +172,12 @@ impl Processor { // - error.stack // - metrics tags (for asm) - enhanced_metric_data.clone_from(&context.enhanced_metric_data); + if let Some(offsets) = &context.enhanced_metric_data { + self.enhanced_metrics.set_cpu_utilization_enhanced_metrics( + offsets.cpu_offset.clone(), + offsets.uptime_offset, + ); + } } if let Some(trigger_tags) = self.inferrer.get_trigger_tags() { @@ -190,8 +224,6 @@ impl Processor { debug!("Failed to send invocation span to agent: {e}"); } } - - enhanced_metric_data } /// Given a `request_id` and the duration in milliseconds of the platform report, @@ -200,22 +232,27 @@ impl Processor { /// If the `request_id` is not found in the context buffer, return `None`. /// If the `runtime_duration_ms` hasn't been seen, return `None`. /// - pub fn on_platform_report( - &mut self, - request_id: &String, - duration_ms: f64, - ) -> (Option, Option) { - if let Some(context) = self.context_buffer.remove(request_id) { - let mut post_runtime_duration_ms: Option = None; + pub fn on_platform_report(&mut self, request_id: &String, metrics: ReportMetrics) { + // Set the report log metrics + self.enhanced_metrics.set_report_log_metrics(&metrics); + if let Some(context) = self.context_buffer.remove(request_id) { if context.runtime_duration_ms != 0.0 { - post_runtime_duration_ms = Some(duration_ms - context.runtime_duration_ms); + let post_runtime_duration_ms = metrics.duration_ms - context.runtime_duration_ms; + + // Set the post runtime duration metric + self.enhanced_metrics + .set_post_runtime_duration_metric(post_runtime_duration_ms); } - return (post_runtime_duration_ms, context.enhanced_metric_data); + // Set Network and CPU time metrics + if let Some(offsets) = context.enhanced_metric_data { + self.enhanced_metrics + .set_network_enhanced_metrics(offsets.network_offset); + self.enhanced_metrics + .set_cpu_time_enhanced_metrics(offsets.cpu_offset); + } } - - (None, None) } /// If this method is called, it means that we are operating in a Universally Instrumented From 56677ce0a6df2c49d1e2d9fbc82ffcbf8cdfd224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Tue, 12 Nov 2024 10:45:09 -0500 Subject: [PATCH 24/41] feat(bottlecap): set error to invocation span (#444) * move `base64_to_string` to `lifecycle::invocation` module * set error on span from headers checks the headers to identify errors that should be attatched to the invocation span and the inferred span * increment metrics on error * fmt * remove a todo --- bottlecap/src/lifecycle/invocation/mod.rs | 9 ++ .../src/lifecycle/invocation/processor.rs | 93 ++++++++++++++++--- .../src/lifecycle/invocation/span_inferrer.rs | 6 ++ .../src/lifecycle/invocation/triggers/mod.rs | 9 -- .../invocation/triggers/sns_event.rs | 10 +- bottlecap/src/lifecycle/listener.rs | 8 +- 6 files changed, 104 insertions(+), 31 deletions(-) diff --git a/bottlecap/src/lifecycle/invocation/mod.rs b/bottlecap/src/lifecycle/invocation/mod.rs index 39d0557dc..454cfa3bc 100644 --- a/bottlecap/src/lifecycle/invocation/mod.rs +++ b/bottlecap/src/lifecycle/invocation/mod.rs @@ -1,4 +1,13 @@ +use base64::{engine::general_purpose, DecodeError, Engine}; + pub mod context; pub mod processor; pub mod span_inferrer; pub mod triggers; + +pub fn base64_to_string(base64_string: &str) -> Result { + match general_purpose::STANDARD.decode(base64_string) { + Ok(bytes) => Ok(String::from_utf8_lossy(&bytes).to_string()), + Err(e) => Err(e), + } +} diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 467519e37..b9e82db44 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -14,7 +14,9 @@ use tracing::debug; use crate::{ config::{self, AwsConfig}, - lifecycle::invocation::{context::ContextBuffer, span_inferrer::SpanInferrer}, + lifecycle::invocation::{ + base64_to_string, context::ContextBuffer, span_inferrer::SpanInferrer, + }, metrics::enhanced::lambda::{EnhancedMetricData, Lambda as EnhancedMetrics}, proc::{self, CPUData, NetworkData}, tags::provider, @@ -29,6 +31,11 @@ use crate::{ pub const MS_TO_NS: f64 = 1_000_000.0; pub const S_TO_NS: f64 = 1_000_000_000.0; +pub const DATADOG_INVOCATION_ERROR_MESSAGE_KEY: &str = "x-datadog-invocation-error-msg"; +pub const DATADOG_INVOCATION_ERROR_TYPE_KEY: &str = "x-datadog-invocation-error-type"; +pub const DATADOG_INVOCATION_ERROR_STACK_KEY: &str = "x-datadog-invocation-error-stack"; +pub const DATADOG_INVOCATION_ERROR_KEY: &str = "x-datadog-invocation-error"; + pub struct Processor { pub context_buffer: ContextBuffer, inferrer: SpanInferrer, @@ -64,11 +71,11 @@ impl Processor { service, name: "aws.lambda".to_string(), resource, - trace_id: 0, // set later - span_id: 0, // maybe set later? - parent_id: 0, // set later - start: 0, // set later - duration: 0, // set later + trace_id: 0, + span_id: 0, + parent_id: 0, + start: 0, + duration: 0, error: 0, meta: HashMap::new(), metrics: HashMap::new(), @@ -167,9 +174,6 @@ impl Processor { // - language // - function.request - capture lambda payload // - function.response - // - error.msg - // - error.type - // - error.stack // - metrics tags (for asm) if let Some(offsets) = &context.enhanced_metric_data { @@ -328,15 +332,28 @@ impl Processor { headers: HashMap, status_code: Option, ) { - self.update_span_context(headers); - if self.inferrer.inferred_span.is_some() { - if let Some(status_code) = status_code { - self.inferrer.set_status_code(status_code); + if let Some(status_code) = status_code { + self.span + .meta + .insert("http.status_code".to_string(), status_code.clone()); + + if status_code.len() == 3 && status_code.starts_with('5') { + self.span.error = 1; } + + // If we have an inferred span, set the status code to it + self.inferrer.set_status_code(status_code); + } + + self.update_span_context_from_headers(&headers); + self.set_span_error_from_headers(headers); + + if self.span.error == 1 { + self.enhanced_metrics.increment_errors_metric(); } } - fn update_span_context(&mut self, headers: HashMap) { + fn update_span_context_from_headers(&mut self, headers: &HashMap) { // todo: fix this, code is a copy of the existing logic in Go, not accounting // when a 128 bit trace id exist let mut trace_id = 0; @@ -364,8 +381,56 @@ impl Processor { self.span.trace_id = trace_id; self.span.span_id = span_id; + // If no inferred span, set the parent id right away if self.inferrer.inferred_span.is_none() { self.span.parent_id = parent_id; } } + + /// Given end invocation headers, set error metadata, if present, to the current span. + /// + fn set_span_error_from_headers(&mut self, headers: HashMap) { + let message = headers.get(DATADOG_INVOCATION_ERROR_MESSAGE_KEY); + let r#type = headers.get(DATADOG_INVOCATION_ERROR_TYPE_KEY); + let stack = headers.get(DATADOG_INVOCATION_ERROR_STACK_KEY); + + let is_error = headers + .get(DATADOG_INVOCATION_ERROR_KEY) + .map_or(false, |v| v.to_lowercase() == "true") + || message.is_some() + || stack.is_some() + || r#type.is_some() + || self.span.error == 1; + if is_error { + self.span.error = 1; + + if let Some(m) = message { + self.span + .meta + .insert(String::from("error.msg"), m.to_string()); + } + + if let Some(t) = r#type { + self.span + .meta + .insert(String::from("error.type"), t.to_string()); + } + + if let Some(s) = stack { + let decoded_stack = match base64_to_string(s) { + Ok(decoded) => decoded, + Err(e) => { + debug!("Failed to decode error stack: {e}"); + s.to_string() + } + }; + + self.span + .meta + .insert(String::from("error.stack"), decoded_stack); + } + + // todo: handle timeout + } + } } diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index edc3253c1..cdd14d32d 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -187,6 +187,9 @@ impl SpanInferrer { ws.duration = duration; } + // Set error + ws.error = invocation_span.error; + ws.trace_id = invocation_span.trace_id; } @@ -201,6 +204,9 @@ impl SpanInferrer { s.duration = duration; } + // Set error + s.error = invocation_span.error; + s.trace_id = invocation_span.trace_id; } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index e0d347f08..dcc3f9b62 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -1,6 +1,5 @@ use std::{collections::HashMap, hash::BuildHasher}; -use base64::{engine::general_purpose, Engine}; use datadog_trace_protobuf::pb::Span; use serde::{ser::SerializeMap, Serializer}; use serde_json::Value; @@ -40,14 +39,6 @@ pub fn get_aws_partition_by_region(region: &str) -> String { } } -#[must_use] -pub fn base64_to_string(base64_string: &str) -> String { - let bytes = general_purpose::STANDARD - .decode(base64_string) - .unwrap_or_default(); - String::from_utf8_lossy(&bytes).to_string() -} - /// Serialize a `HashMap` with lowercase keys /// pub fn lowercase_key( diff --git a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs index cbf313ff6..eaa1ab907 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs @@ -6,8 +6,9 @@ use serde_json::Value; use tracing::debug; use crate::lifecycle::invocation::{ + base64_to_string, processor::MS_TO_NS, - triggers::{base64_to_string, Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, + triggers::{Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, }; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -132,12 +133,13 @@ impl Trigger for SnsRecord { } fn get_carrier(&self) -> HashMap { - let carrier = HashMap::new(); if let Some(ma) = self.sns.message_attributes.get(DATADOG_CARRIER_KEY) { match ma.r#type.as_str() { "String" => return serde_json::from_str(&ma.value).unwrap_or_default(), "Binary" => { - return serde_json::from_str(&base64_to_string(&ma.value)).unwrap_or_default() + if let Ok(carrier) = base64_to_string(&ma.value) { + return serde_json::from_str(&carrier).unwrap_or_default(); + } } _ => { debug!("Unsupported type in SNS message attribute"); @@ -145,7 +147,7 @@ impl Trigger for SnsRecord { } } - carrier + HashMap::new() } fn is_async(&self) -> bool { diff --git a/bottlecap/src/lifecycle/listener.rs b/bottlecap/src/lifecycle/listener.rs index b255ec491..589815a4a 100644 --- a/bottlecap/src/lifecycle/listener.rs +++ b/bottlecap/src/lifecycle/listener.rs @@ -144,15 +144,15 @@ impl Listener { let parsed_body = serde_json::from_slice::( &hyper::body::to_bytes(body).await.unwrap_or_default(), ); - let mut parsed_status: Option = None; - if let Some(status_code) = parsed_body.unwrap_or_default().get("statusCode") { - parsed_status = Some(status_code.to_string()); + let mut parsed_status_code: Option = None; + if let Some(sc) = parsed_body.unwrap_or_default().get("statusCode") { + parsed_status_code = Some(sc.to_string()); } let mut processor = invocation_processor.lock().await; let headers = Self::headers_to_map(parts.headers); - processor.on_invocation_end(headers, parsed_status); + processor.on_invocation_end(headers, parsed_status_code); drop(processor); Response::builder() From 90eff816b7042b73fb75b44d04ab0026aae11c98 Mon Sep 17 00:00:00 2001 From: alexgallotta <5581237+alexgallotta@users.noreply.github.com> Date: Tue, 12 Nov 2024 12:39:18 -0500 Subject: [PATCH 25/41] feat(bottlecap): add EventBridge-SQS inferred spans (#447) * add eb-sqs * fix import --- .../src/lifecycle/invocation/span_inferrer.rs | 17 +++++++++- .../invocation/triggers/dynamodb_event.rs | 2 +- .../invocation/triggers/event_bridge_event.rs | 2 +- .../invocation/triggers/sqs_event.rs | 33 +++++++++++++++++-- .../tests/payloads/eventbridge_sqs_event.json | 21 ++++++++++++ 5 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 bottlecap/tests/payloads/eventbridge_sqs_event.json diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index cdd14d32d..9dc7c4cc7 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -98,7 +98,22 @@ impl SpanInferrer { inferred_span.start - wrapped_inferred_span.start; self.wrapped_inferred_span = Some(wrapped_inferred_span); - } + } else if let Ok(event_bridge_entity) = + serde_json::from_str::(&t.body) + { + let mut wrapped_inferred_span = Span { + span_id: Self::generate_span_id(), + ..Default::default() + }; + + event_bridge_entity.enrich_span(&mut wrapped_inferred_span); + inferred_span.meta.extend(event_bridge_entity.get_tags()); + + wrapped_inferred_span.duration = + inferred_span.start - wrapped_inferred_span.start; + + self.wrapped_inferred_span = Some(wrapped_inferred_span); + }; trigger = Some(Box::new(t)); } diff --git a/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs b/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs index 46c20fe27..026e74832 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs @@ -142,7 +142,7 @@ mod tests { let expected = DynamoDbRecord { dynamodb: DynamoDbEntity { - approximate_creation_date_time: 1428537600.0, + approximate_creation_date_time: 1_428_537_600.0, size_bytes: 26, stream_view_type: String::from("NEW_AND_OLD_IMAGES"), }, diff --git a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs index f51b111c6..ff7d174c6 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs @@ -200,7 +200,7 @@ mod tests { assert_eq!(span.resource, "testBus"); // Seconds resolution - assert_eq!(span.start, 1731140535000000000); + assert_eq!(span.start, 1_731_140_535_000_000_000); } #[test] diff --git a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs index 4f804c3de..6e748d4d9 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs @@ -7,6 +7,7 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::MS_TO_NS, triggers::{ + event_bridge_event::EventBridgeEvent, get_aws_partition_by_region, sns_event::{SnsEntity, SnsRecord}, Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, @@ -180,8 +181,6 @@ impl Trigger for SqsRecord { } } - // TODO: Check for EventBridge event sent through SQS - // Check for SNS event sent through SQS if let Ok(sns_entity) = serde_json::from_str::(&self.body) { let sns_record = SnsRecord { @@ -190,6 +189,8 @@ impl Trigger for SqsRecord { }; return sns_record.get_carrier(); + } else if let Ok(event) = serde_json::from_str::(&self.body) { + return event.get_carrier(); } // TODO: AWSTraceHeader @@ -362,4 +363,32 @@ mod tests { assert_eq!(carrier, expected); } + + #[test] + fn test_get_carrier_from_eventbridge() { + let json = read_json_file("eventbridge_sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize EventBridgeEvent"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "7379586022458917877".to_string(), + ), + ( + "traceparent".to_string(), + "00-000000000000000066698e63821a03f5-24b17e9b6476c018-01".to_string(), + ), + ("x-datadog-tags".to_string(), "_dd.p.dm=-0".to_string()), + ( + "x-datadog-parent-id".to_string(), + "2644033662113726488".to_string(), + ), + ("tracestate".to_string(), "dd=t.dm:-0;s:1".to_string()), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } } diff --git a/bottlecap/tests/payloads/eventbridge_sqs_event.json b/bottlecap/tests/payloads/eventbridge_sqs_event.json new file mode 100644 index 000000000..033740244 --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_sqs_event.json @@ -0,0 +1,21 @@ +{ + "Records": [ + { + "messageId": "e995e54f-1724-41fa-82c0-8b81821f854e", + "receiptHandle": "AQEB4mIfRcyqtzn1X5Ss+ConhTejVGc+qnAcmu3/Z9ZvbNkaPcpuDLX/bzvPD/ZkAXJUXZcemGSJmd7L3snZHKMP2Ck8runZiyl4mubiLb444pZvdiNPuGRJ6a3FvgS/GQPzho/9nNMyOi66m8Viwh70v4EUCPGO4JmD3TTDAUrrcAnqU4WSObjfC/NAp9bI6wH2CEyAYEfex6Nxplbl/jBf9ZUG0I3m3vQd0Q4l4gd4jIR4oxQUglU2Tldl4Kx5fMUAhTRLAENri6HsY81avBkKd9FAuxONlsITB5uj02kOkvLlRGEcalqsKyPJ7AFaDLrOLaL3U+yReroPEJ5R5nwhLOEbeN5HROlZRXeaAwZOIN8BjqdeooYTIOrtvMEVb7a6OPLMdH1XB+ddevtKAH8K9Tm2ZjpaA7dtBGh1zFVHzBk=", + "body": "{\"version\":\"0\",\"id\":\"af718b2a-b987-e8c0-7a2b-a188fad2661a\",\"detail-type\":\"my.Detail\",\"source\":\"my.Source\",\"account\":\"425362996713\",\"time\":\"2023-08-03T22:49:03Z\",\"region\":\"us-east-1\",\"resources\":[],\"detail\":{\"text\":\"Hello, world!\",\"_datadog\":{\"x-datadog-trace-id\":\"7379586022458917877\",\"x-datadog-parent-id\":\"2644033662113726488\",\"x-datadog-sampling-priority\":\"1\",\"x-datadog-tags\":\"_dd.p.dm=-0\",\"traceparent\":\"00-000000000000000066698e63821a03f5-24b17e9b6476c018-01\",\"tracestate\":\"dd=t.dm:-0;s:1\"}}}", + "attributes": { + "ApproximateReceiveCount": "1", + "AWSTraceHeader": "Root=1-64cc2edd-112fbf1701d1355973a11d57;Parent=7d5a9776024b2d42;Sampled=0", + "SentTimestamp": "1691102943638", + "SenderId": "AIDAJXNJGGKNS7OSV23OI", + "ApproximateFirstReceiveTimestamp": "1691102943647" + }, + "messageAttributes": {}, + "md5OfBody": "93d9f0cd8886d1e000a1a0b7007bffc4", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:us-east-1:425362996713:lambda-eb-sqs-lambda-dev-demo-queue", + "awsRegion": "us-east-1" + } + ] +} From 4d2a28a4cfe1f6fcd37b7b8f1bceda2cd1dc1956 Mon Sep 17 00:00:00 2001 From: alexgallotta <5581237+alexgallotta@users.noreply.github.com> Date: Tue, 12 Nov 2024 13:06:46 -0500 Subject: [PATCH 26/41] feat(bottlecap): add Kinesis inferred spans (#445) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add kinesis * Update bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs Co-authored-by: jordan gonzález <30836115+duncanista@users.noreply.github.com> * group up import and address comments * fix timestamp, it's in seconds * fix clippy * deserialized carrier * remove manual deref and resourcename meta tag since it is not used --------- Co-authored-by: jordan gonzález <30836115+duncanista@users.noreply.github.com> --- .../src/lifecycle/invocation/span_inferrer.rs | 7 + .../invocation/triggers/kinesis_event.rs | 248 ++++++++++++++++++ .../src/lifecycle/invocation/triggers/mod.rs | 1 + bottlecap/tests/payloads/kinesis_event.json | 20 ++ 4 files changed, 276 insertions(+) create mode 100644 bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs create mode 100644 bottlecap/tests/payloads/kinesis_event.json diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index 9dc7c4cc7..3c1401579 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -12,6 +12,7 @@ use crate::lifecycle::invocation::triggers::{ api_gateway_rest_event::APIGatewayRestEvent, dynamodb_event::DynamoDbRecord, event_bridge_event::EventBridgeEvent, + kinesis_event::KinesisRecord, sns_event::{SnsEntity, SnsRecord}, sqs_event::SqsRecord, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, @@ -139,6 +140,12 @@ impl SpanInferrer { if let Some(t) = EventBridgeEvent::new(payload_value.clone()) { t.enrich_span(&mut inferred_span); + trigger = Some(Box::new(t)); + } + } else if KinesisRecord::is_match(payload_value) { + if let Some(t) = KinesisRecord::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span); + trigger = Some(Box::new(t)); } } else { diff --git a/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs new file mode 100644 index 000000000..c735d5439 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs @@ -0,0 +1,248 @@ +#![allow(clippy::module_name_repetitions)] +use base64::engine::general_purpose; +use base64::Engine; +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::{from_slice, Value}; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::S_TO_NS, + triggers::{Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct KinesisEvent { + #[serde(rename = "Records")] + pub records: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct KinesisRecord { + #[serde(rename = "eventID")] + pub event_id: String, + #[serde(rename = "eventName")] + pub event_name: String, + #[serde(rename = "eventSourceARN")] + pub event_source_arn: String, + #[serde(rename = "eventVersion")] + pub event_version: String, + pub kinesis: KinesisEntity, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct KinesisEntity { + #[serde(rename = "approximateArrivalTimestamp")] + pub approximate_arrival_timestamp: f64, + #[serde(rename = "partitionKey")] + pub partition_key: String, + pub data: String, +} + +impl Trigger for KinesisRecord { + fn new(payload: Value) -> Option { + let records = payload.get("Records").and_then(Value::as_array); + match records { + Some(records) => match serde_json::from_value::(records[0].clone()) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize Kinesis Record: {e}"); + None + } + }, + None => None, + } + } + + fn is_match(payload: &Value) -> bool { + if let Some(first_record) = payload + .get("Records") + .and_then(Value::as_array) + .and_then(|r| r.first()) + .take() + { + first_record.get("kinesis").is_some() + } else { + false + } + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span) { + let event_source_arn = &self.event_source_arn; + let parsed_stream_name = event_source_arn.split('/').last().unwrap_or_default(); + let parsed_shard_id = self.event_id.split(':').next().unwrap_or_default(); + span.name = "aws.kinesis".to_string(); + span.service = "kinesis".to_string(); + span.start = (self.kinesis.approximate_arrival_timestamp * S_TO_NS) as i64; + span.resource = parsed_stream_name.to_string(); + span.r#type = "web".to_string(); + span.meta = HashMap::from([ + ("operation_name".to_string(), "aws.kinesis".to_string()), + ("stream_name".to_string(), parsed_stream_name.to_string()), + ("shard_id".to_string(), parsed_shard_id.to_string()), + ("event_source_arn".to_string(), event_source_arn.to_string()), + ("event_id".to_string(), self.event_id.to_string()), + ("event_name".to_string(), self.event_name.to_string()), + ("event_version".to_string(), self.event_version.to_string()), + ( + "partition_key".to_string(), + self.kinesis.partition_key.to_string(), + ), + ]); + } + + fn get_tags(&self) -> HashMap { + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "kinesis".to_string(), + )]) + } + + fn get_arn(&self, _region: &str) -> String { + self.event_source_arn.clone() + } + + fn get_carrier(&self) -> HashMap { + if let Ok(decoded_base64) = general_purpose::STANDARD.decode(&self.kinesis.data) { + if let Ok(as_json_map) = from_slice::>(&decoded_base64) { + if let Some(carrier) = as_json_map.get(DATADOG_CARRIER_KEY) { + return serde_json::from_value(carrier.clone()).unwrap_or_default(); + } + } + }; + HashMap::new() + } + + fn is_async(&self) -> bool { + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = KinesisRecord::new(payload).expect("Failed to deserialize into Record"); + + let expected = KinesisRecord { + event_id: + "shardId-000000000002:49624230154685806402418173680709770494154422022871973922" + .to_string(), + event_name: "aws:kinesis:record".to_string(), + event_source_arn: "arn:aws:kinesis:sa-east-1:425362996713:stream/kinesisStream" + .to_string(), + event_version: "1.0".to_string(), + kinesis: KinesisEntity { + approximate_arrival_timestamp: 1_643_638_425.163, + partition_key: "partitionkey".to_string(), + data: "eyJmb28iOiAiYmFyIiwgIl9kYXRhZG9nIjogeyJ4LWRhdGFkb2ctdHJhY2UtaWQiOiAiNDk0ODM3NzMxNjM1NzI5MTQyMSIsICJ4LWRhdGFkb2ctcGFyZW50LWlkIjogIjI4NzYyNTMzODAwMTg2ODEwMjYiLCAieC1kYXRhZG9nLXNhbXBsaW5nLXByaW9yaXR5IjogIjEifX0=".to_string(), + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize S3Record"); + + assert!(KinesisRecord::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!KinesisRecord::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = KinesisRecord::new(payload).expect("Failed to deserialize S3Record"); + let mut span = Span::default(); + event.enrich_span(&mut span); + assert_eq!(span.name, "aws.kinesis"); + assert_eq!(span.service, "kinesis"); + assert_eq!(span.resource, "kinesisStream"); + assert_eq!(span.r#type, "web"); + + assert_eq!( + span.meta, + HashMap::from([ + ("operation_name".to_string(), "aws.kinesis".to_string()), + ("stream_name".to_string(), "kinesisStream".to_string()), + ("shard_id".to_string(), "shardId-000000000002".to_string()), + ( + "event_source_arn".to_string(), + "arn:aws:kinesis:sa-east-1:425362996713:stream/kinesisStream".to_string() + ), + ( + "event_id".to_string(), + "shardId-000000000002:49624230154685806402418173680709770494154422022871973922" + .to_string() + ), + ("event_name".to_string(), "aws:kinesis:record".to_string()), + ("event_version".to_string(), "1.0".to_string()), + ("partition_key".to_string(), "partitionkey".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = KinesisRecord::new(payload).expect("Failed to deserialize KinesisRecord"); + let tags = event.get_tags(); + + let expected = HashMap::from([( + "function_trigger.event_source".to_string(), + "kinesis".to_string(), + )]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = KinesisRecord::new(payload).expect("Failed to deserialize KinesisRecord"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:kinesis:sa-east-1:425362996713:stream/kinesisStream".to_string() + ); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = KinesisRecord::new(payload).expect("Failed to deserialize KinesisRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "4948377316357291421".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "2876253380018681026".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index dcc3f9b62..a5d0f4ae2 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -8,6 +8,7 @@ pub mod api_gateway_http_event; pub mod api_gateway_rest_event; pub mod dynamodb_event; pub mod event_bridge_event; +pub mod kinesis_event; pub mod s3_event; pub mod sns_event; pub mod sqs_event; diff --git a/bottlecap/tests/payloads/kinesis_event.json b/bottlecap/tests/payloads/kinesis_event.json new file mode 100644 index 000000000..822530822 --- /dev/null +++ b/bottlecap/tests/payloads/kinesis_event.json @@ -0,0 +1,20 @@ +{ + "Records": [ + { + "kinesis": { + "kinesisSchemaVersion": "1.0", + "partitionKey": "partitionkey", + "sequenceNumber": "49624230154685806402418173680709770494154422022871973922", + "data": "eyJmb28iOiAiYmFyIiwgIl9kYXRhZG9nIjogeyJ4LWRhdGFkb2ctdHJhY2UtaWQiOiAiNDk0ODM3NzMxNjM1NzI5MTQyMSIsICJ4LWRhdGFkb2ctcGFyZW50LWlkIjogIjI4NzYyNTMzODAwMTg2ODEwMjYiLCAieC1kYXRhZG9nLXNhbXBsaW5nLXByaW9yaXR5IjogIjEifX0=", + "approximateArrivalTimestamp": 1643638425.163 + }, + "eventSource": "aws:kinesis", + "eventVersion": "1.0", + "eventID": "shardId-000000000002:49624230154685806402418173680709770494154422022871973922", + "eventName": "aws:kinesis:record", + "invokeIdentityArn": "arn:aws:iam::425362996713:role/inferred-spans-python-dev-sa-east-1-lambdaRole", + "awsRegion": "sa-east-1", + "eventSourceARN": "arn:aws:kinesis:sa-east-1:425362996713:stream/kinesisStream" + } + ] +} From 40476a65eec48ff881795746e8ff5934ade82be5 Mon Sep 17 00:00:00 2001 From: shreyamalpani Date: Tue, 12 Nov 2024 13:16:40 -0500 Subject: [PATCH 27/41] feat(bottlecap): generate tmp enhanced metrics (#443) * generate tmp enhanced metrics * fix channel stop signal * use tokio async task instead of thread * statfs fix * fixes * remove unused import * rename tmp_chan to tmp_chan_tx --- bottlecap/Cargo.toml | 2 +- bottlecap/src/bin/bottlecap/main.rs | 6 +- bottlecap/src/lifecycle/invocation/context.rs | 3 + .../src/lifecycle/invocation/processor.rs | 11 +- bottlecap/src/metrics/enhanced/constants.rs | 6 + bottlecap/src/metrics/enhanced/lambda.rs | 125 +++++++++++++++++- bottlecap/src/metrics/enhanced/mod.rs | 1 + bottlecap/src/metrics/enhanced/statfs.rs | 25 ++++ 8 files changed, 172 insertions(+), 7 deletions(-) create mode 100644 bottlecap/src/metrics/enhanced/statfs.rs diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index efe1cbb11..7fcba69a5 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -20,7 +20,7 @@ figment = { version = "0.10", default-features = false, features = ["yaml", "env hyper = { version = "0.14", default-features = false, features = ["server"] } lazy_static = { version = "1.5", default-features = false } log = { version = "0.4", default-features = false } -nix = { version = "0.26", default-features = false, features = ["feature"] } +nix = { version = "0.26", default-features = false, features = ["feature", "fs"] } protobuf = { version = "3.5", default-features = false } regex = { version = "1.10", default-features = false } reqwest = { version = "0.12", features = ["json", "http2", "rustls-tls"], default-features = false } diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 6856bd8a3..a1c78ec50 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -53,11 +53,9 @@ use dogstatsd::{ use reqwest::Client; use serde::Deserialize; use std::{ - collections::hash_map, - collections::HashMap, + collections::{hash_map, HashMap}, env, - io::Error, - io::Result, + io::{Error, Result}, os::unix::process::CommandExt, path::Path, process::Command, diff --git a/bottlecap/src/lifecycle/invocation/context.rs b/bottlecap/src/lifecycle/invocation/context.rs index d90a798b9..4cffb929c 100644 --- a/bottlecap/src/lifecycle/invocation/context.rs +++ b/bottlecap/src/lifecycle/invocation/context.rs @@ -172,6 +172,7 @@ impl ContextBuffer { mod tests { use crate::proc::{CPUData, NetworkData}; use std::collections::HashMap; + use tokio::sync::watch; use super::*; @@ -318,11 +319,13 @@ mod tests { }); let uptime_offset = Some(50.0); + let (tmp_chan_tx, _) = watch::channel(()); let enhanced_metric_data = Some(EnhancedMetricData { network_offset, cpu_offset, uptime_offset, + tmp_chan_tx, }); buffer.add_enhanced_metric_data(&request_id, enhanced_metric_data.clone()); diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index b9e82db44..d6e44a98e 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -9,7 +9,7 @@ use datadog_trace_protobuf::pb::Span; use datadog_trace_utils::{send_data::SendData, tracer_header_tags}; use dogstatsd::aggregator::Aggregator as MetricsAggregator; use serde_json::{json, Value}; -use tokio::sync::mpsc::Sender; +use tokio::sync::{mpsc::Sender, watch}; use tracing::debug; use crate::{ @@ -97,13 +97,20 @@ impl Processor { pub fn on_invoke_event(&mut self, request_id: String) { self.context_buffer.create_context(request_id.clone()); if self.enhanced_metrics_enabled { + // Collect offsets for network and cpu metrics let network_offset: Option = proc::get_network_data().ok(); let cpu_offset: Option = proc::get_cpu_data().ok(); let uptime_offset: Option = proc::get_uptime().ok(); + + // Start a channel for monitoring tmp enhanced data + let (tmp_chan_tx, tmp_chan_rx) = watch::channel(()); + self.enhanced_metrics.set_tmp_enhanced_metrics(tmp_chan_rx); + let enhanced_metric_offsets = Some(EnhancedMetricData { network_offset, cpu_offset, uptime_offset, + tmp_chan_tx, }); self.context_buffer .add_enhanced_metric_data(&request_id, enhanced_metric_offsets); @@ -181,6 +188,8 @@ impl Processor { offsets.cpu_offset.clone(), offsets.uptime_offset, ); + // Send the signal to stop monitoring tmp + _ = offsets.tmp_chan_tx.send(()); } } diff --git a/bottlecap/src/metrics/enhanced/constants.rs b/bottlecap/src/metrics/enhanced/constants.rs index 2d17e73ec..f62fd67ea 100644 --- a/bottlecap/src/metrics/enhanced/constants.rs +++ b/bottlecap/src/metrics/enhanced/constants.rs @@ -5,6 +5,9 @@ pub const ARM_LAMBDA_PRICE_PER_GB_SECOND: f64 = 0.000_013_333_4; pub const MS_TO_SEC: f64 = 0.001; pub const MB_TO_GB: f64 = 1_024.0; +// tmp directory path +pub const TMP_PATH: &str = "/tmp/"; + // Enhanced metrics pub const MAX_MEMORY_USED_METRIC: &str = "aws.lambda.enhanced.max_memory_used"; pub const MEMORY_SIZE_METRIC: &str = "aws.lambda.enhanced.memorysize"; @@ -32,5 +35,8 @@ pub const CPU_TOTAL_UTILIZATION_METRIC: &str = "aws.lambda.enhanced.cpu_total_ut pub const NUM_CORES_METRIC: &str = "aws.lambda.enhanced.num_cores"; pub const CPU_MAX_UTILIZATION_METRIC: &str = "aws.lambda.enhanced.cpu_max_utilization"; pub const CPU_MIN_UTILIZATION_METRIC: &str = "aws.lambda.enhanced.cpu_min_utilization"; +pub const TMP_MAX_METRIC: &str = "aws.lambda.enhanced.tmp_max"; +pub const TMP_USED_METRIC: &str = "aws.lambda.enhanced.tmp_used"; +pub const TMP_FREE_METRIC: &str = "aws.lambda.enhanced.tmp_free"; //pub const ASM_INVOCATIONS_METRIC: &str = "aws.lambda.enhanced.asm.invocations"; pub const ENHANCED_METRICS_ENV_VAR: &str = "DD_ENHANCED_METRICS"; diff --git a/bottlecap/src/metrics/enhanced/lambda.rs b/bottlecap/src/metrics/enhanced/lambda.rs index b11427e96..6e4f53c72 100644 --- a/bottlecap/src/metrics/enhanced/lambda.rs +++ b/bottlecap/src/metrics/enhanced/lambda.rs @@ -1,4 +1,5 @@ use super::constants::{self, BASE_LAMBDA_INVOCATION_PRICE}; +use super::statfs::statfs_info; use crate::proc::{self, CPUData, NetworkData}; use crate::telemetry::events::ReportMetrics; use dogstatsd::aggregator::Aggregator; @@ -6,6 +7,11 @@ use dogstatsd::metric; use dogstatsd::metric::{Metric, MetricValue}; use std::env::consts::ARCH; use std::sync::{Arc, Mutex}; +use std::time::Duration; +use tokio::{ + sync::watch::{Receiver, Sender}, + time::interval, +}; use tracing::debug; use tracing::error; @@ -343,6 +349,86 @@ impl Lambda { } } + pub fn generate_tmp_enhanced_metrics( + tmp_max: f64, + tmp_used: f64, + aggr: &mut std::sync::MutexGuard, + ) { + let metric = Metric::new( + constants::TMP_MAX_METRIC.into(), + MetricValue::distribution(tmp_max), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert tmp_max metric: {}", e); + } + + let metric = Metric::new( + constants::TMP_USED_METRIC.into(), + MetricValue::distribution(tmp_used), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert tmp_used metric: {}", e); + } + + let tmp_free = tmp_max - tmp_used; + let metric = Metric::new( + constants::TMP_FREE_METRIC.into(), + MetricValue::distribution(tmp_free), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert tmp_free metric: {}", e); + } + } + + pub fn set_tmp_enhanced_metrics(&self, mut send_metrics: Receiver<()>) { + if !self.config.enhanced_metrics { + return; + } + + let aggr = Arc::clone(&self.aggregator); + + tokio::spawn(async move { + // Set tmp_max and initial value for tmp_used + let (bsize, blocks, bavail) = match statfs_info(constants::TMP_PATH) { + Ok(stats) => stats, + Err(err) => { + debug!("Could not emit tmp enhanced metrics. {:?}", err); + return; + } + }; + let tmp_max = bsize * blocks; + let mut tmp_used = bsize * (blocks - bavail); + + let mut interval = interval(Duration::from_millis(10)); + loop { + tokio::select! { + biased; + // When the stop signal is received, generate final metrics + _ = send_metrics.changed() => { + let mut aggr: std::sync::MutexGuard = + aggr.lock().expect("lock poisoned"); + Self::generate_tmp_enhanced_metrics(tmp_max, tmp_used, &mut aggr); + return; + } + // Otherwise keep monitoring tmp usage periodically + _ = interval.tick() => { + let (bsize, blocks, bavail) = match statfs_info(constants::TMP_PATH) { + Ok(stats) => stats, + Err(err) => { + debug!("Could not emit tmp enhanced metrics. {:?}", err); + return; + } + }; + tmp_used = tmp_used.max(bsize * (blocks - bavail)); + } + } + } + }); + } + fn calculate_estimated_cost_usd(billed_duration_ms: u64, memory_size_mb: u64) -> f64 { let gb_seconds = (billed_duration_ms as f64 * constants::MS_TO_SEC) * (memory_size_mb as f64 / constants::MB_TO_GB); @@ -411,11 +497,20 @@ impl Lambda { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug)] pub struct EnhancedMetricData { pub network_offset: Option, pub cpu_offset: Option, pub uptime_offset: Option, + pub tmp_chan_tx: Sender<()>, +} + +impl PartialEq for EnhancedMetricData { + fn eq(&self, other: &Self) -> bool { + self.network_offset == other.network_offset + && self.cpu_offset == other.cpu_offset + && self.uptime_offset == other.uptime_offset + } } #[cfg(test)] @@ -565,6 +660,15 @@ mod tests { assert!(aggr .get_entry_by_id(constants::CPU_MAX_UTILIZATION_METRIC.into(), &None) .is_none()); + assert!(aggr + .get_entry_by_id(constants::TMP_MAX_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::TMP_USED_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::TMP_FREE_METRIC.into(), &None) + .is_none()); } #[test] @@ -695,4 +799,23 @@ mod tests { assert_sketch(&metrics_aggr, constants::CPU_MAX_UTILIZATION_METRIC, 30.0); assert_sketch(&metrics_aggr, constants::CPU_MIN_UTILIZATION_METRIC, 28.75); } + + #[test] + fn test_set_tmp_enhanced_metrics() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let tmp_max = 550461440.0; + let tmp_used = 12165120.0; + + Lambda::generate_tmp_enhanced_metrics( + tmp_max, + tmp_used, + &mut lambda.aggregator.lock().expect("lock poisoned"), + ); + + assert_sketch(&metrics_aggr, constants::TMP_MAX_METRIC, 550461440.0); + assert_sketch(&metrics_aggr, constants::TMP_USED_METRIC, 12165120.0); + assert_sketch(&metrics_aggr, constants::TMP_FREE_METRIC, 538296320.0); + } } diff --git a/bottlecap/src/metrics/enhanced/mod.rs b/bottlecap/src/metrics/enhanced/mod.rs index a2638024e..bca7c1bf0 100644 --- a/bottlecap/src/metrics/enhanced/mod.rs +++ b/bottlecap/src/metrics/enhanced/mod.rs @@ -1,2 +1,3 @@ pub mod constants; pub mod lambda; +pub mod statfs; diff --git a/bottlecap/src/metrics/enhanced/statfs.rs b/bottlecap/src/metrics/enhanced/statfs.rs new file mode 100644 index 000000000..0da8a1828 --- /dev/null +++ b/bottlecap/src/metrics/enhanced/statfs.rs @@ -0,0 +1,25 @@ +#![allow(clippy::module_name_repetitions)] + +use nix::sys::statfs::statfs; +use std::io; +use std::path::Path; + +#[cfg(not(target_os = "windows"))] +/// Returns the block size, total number of blocks, and number of blocks available for the specified directory path. +/// +pub fn statfs_info(path: &str) -> Result<(f64, f64, f64), io::Error> { + let stat = statfs(Path::new(path)).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + Ok(( + stat.block_size() as f64, + stat.blocks() as f64, + stat.blocks_available() as f64, + )) +} + +#[cfg(target_os = "windows")] +fn statfs_info(path: &str) -> Result<(f64, f64, f64), io::Error> { + Err(io::Error::new( + io::ErrorKind::Other, + "Cannot get tmp data on Windows", + )) +} From 5908684ed9f0aa340874c6d04a93bab89993b516 Mon Sep 17 00:00:00 2001 From: alexgallotta <5581237+alexgallotta@users.noreply.github.com> Date: Tue, 12 Nov 2024 13:20:39 -0500 Subject: [PATCH 28/41] feat(bottlecap): add EventBridge-SNS inferred spans (#448) * add eb sns * fix import --- .../src/lifecycle/invocation/span_inferrer.rs | 21 +++++++++ .../invocation/triggers/sns_event.rs | 43 ++++++++++++++++++- .../tests/payloads/eventbridge_sns_event.json | 17 ++++++++ 3 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 bottlecap/tests/payloads/eventbridge_sns_event.json diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index 3c1401579..39314dd83 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -122,6 +122,27 @@ impl SpanInferrer { if let Some(t) = SnsRecord::new(payload_value.clone()) { t.enrich_span(&mut inferred_span); + if let Some(message) = &t.sns.message { + if let Ok(event_bridge_wrapper_message) = + serde_json::from_str::(message) + { + let mut wrapped_inferred_span = Span { + span_id: Self::generate_span_id(), + ..Default::default() + }; + + event_bridge_wrapper_message.enrich_span(&mut wrapped_inferred_span); + inferred_span + .meta + .extend(event_bridge_wrapper_message.get_tags()); + + wrapped_inferred_span.duration = + inferred_span.start - wrapped_inferred_span.start; + + self.wrapped_inferred_span = Some(wrapped_inferred_span); + } + } + trigger = Some(Box::new(t)); } } else if DynamoDbRecord::is_match(payload_value) { diff --git a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs index eaa1ab907..2b7514cf1 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs @@ -8,7 +8,10 @@ use tracing::debug; use crate::lifecycle::invocation::{ base64_to_string, processor::MS_TO_NS, - triggers::{Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, + triggers::{ + event_bridge_event::EventBridgeEvent, Trigger, DATADOG_CARRIER_KEY, + FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, }; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -39,6 +42,8 @@ pub struct SnsEntity { pub timestamp: DateTime, #[serde(rename = "Subject")] pub subject: Option, + #[serde(rename = "Message")] + pub message: Option, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -50,7 +55,7 @@ pub struct MessageAttribute { } impl Trigger for SnsRecord { - fn new(payload: serde_json::Value) -> Option { + fn new(payload: Value) -> Option { match payload.get("Records").and_then(Value::as_array) { Some(records) => match serde_json::from_value::(records[0].clone()) { Ok(record) => Some(record), @@ -145,6 +150,10 @@ impl Trigger for SnsRecord { debug!("Unsupported type in SNS message attribute"); } } + } else if let Some(event_bridge_message) = &self.sns.message { + if let Ok(event) = serde_json::from_str::(event_bridge_message) { + return event.get_carrier(); + } } HashMap::new() @@ -187,6 +196,7 @@ mod tests { .unwrap() .with_timezone(&Utc), subject: None, + message: Some("Asynchronously invoking a Lambda function with SNS.".to_string()), }, }; @@ -302,4 +312,33 @@ mod tests { assert_eq!(carrier, expected); } + + #[test] + fn test_get_carrier_from_event_bridge() { + let json = read_json_file("eventbridge_sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + println!("{payload:?}"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-resource-name".to_string(), + "test-bus".to_string(), + ), + ("x-datadog-trace-id".to_string(), "12345".to_string()), + ( + "x-datadog-start-time".to_string(), + "1726515840997".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ("x-datadog-parent-id".to_string(), "67890".to_string()), + ( + "x-datadog-tags".to_string(), + "_dd.p.dm=-1,_dd.p.tid=123567890".to_string(), + ), + ]); + + assert_eq!(carrier, expected); + } } diff --git a/bottlecap/tests/payloads/eventbridge_sns_event.json b/bottlecap/tests/payloads/eventbridge_sns_event.json new file mode 100644 index 000000000..176c86021 --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_sns_event.json @@ -0,0 +1,17 @@ +{ + "Records":[ + { + "Sns":{ + "MessageId":"12345678-90abc-def-1234-567890abcdef", + "Type":"Notification", + "TopicArn":"arn:aws:sns:us-east-1:123456789012:test-notifier", + "MessageAttributes":{ + + }, + "Timestamp":"2024-09-16T19:44:01.713Z", + "Subject":"", + "Message":"{\"version\":\"0\",\"id\":\"12345678-90abc-def-1234-567890abcdef\",\"detail-type\":\"TestDetail\",\"source\":\"com.test.source\",\"account\":\"12345667890\",\"time\":\"2024-09-16T19:44:01Z\",\"region\":\"us-east-1\",\"resources\":[],\"detail\":{\"foo\":\"bar\",\"_datadog\":{\"x-datadog-trace-id\":\"12345\",\"x-datadog-parent-id\":\"67890\",\"x-datadog-sampling-priority\":\"1\",\"x-datadog-start-time\":\"1726515840997\",\"x-datadog-resource-name\":\"test-bus\",\"x-datadog-tags\":\"_dd.p.dm=-1,_dd.p.tid=123567890\"}}}" + } + } + ] +} From cb9e745be3a1f07a405468b7a703c3d898991fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:37:09 -0500 Subject: [PATCH 29/41] feat(bottlecap): add Step Functions trace extraction (#449) * add step functions events payloads * make some methods public * add `StepFunctionEvent` * adapt `SpanInferrer` for generated `SpanContext` * adapt `InvocationProcessor` for generated `SpanContext` * resolve merge conflicts * resolve clippy issues * add allow clippy * do not serialize the `entered_time` * set `None` for inferred span when `generated_span_context` exists * tidy code for last trace context update * fix unit test --- .../src/lifecycle/invocation/processor.rs | 65 +++- .../src/lifecycle/invocation/span_inferrer.rs | 48 ++- .../src/lifecycle/invocation/triggers/mod.rs | 1 + .../triggers/step_function_event.rs | 366 ++++++++++++++++++ bottlecap/src/metrics/enhanced/statfs.rs | 1 + .../traces/propagation/text_map_propagator.rs | 5 +- .../tests/payloads/step_function_event.json | 19 + .../payloads/step_function_legacy_event.json | 21 + 8 files changed, 496 insertions(+), 30 deletions(-) create mode 100644 bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs create mode 100644 bottlecap/tests/payloads/step_function_event.json create mode 100644 bottlecap/tests/payloads/step_function_legacy_event.json diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index d6e44a98e..64a8582b1 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -23,7 +23,13 @@ use crate::{ telemetry::events::{ReportMetrics, Status}, traces::{ context::SpanContext, - propagation::{DatadogCompositePropagator, Propagator}, + propagation::{ + text_map_propagator::{ + DatadogHeaderPropagator, DATADOG_PARENT_ID_KEY, DATADOG_SPAN_ID_KEY, + DATADOG_TRACE_ID_KEY, + }, + DatadogCompositePropagator, Propagator, + }, trace_processor, }, }; @@ -278,6 +284,8 @@ impl Processor { self.span.trace_id = 0; self.span.parent_id = 0; self.span.span_id = 0; + self.span.error = 0; + self.span.meta.clear(); self.extracted_span_context = None; let payload_value = match serde_json::from_slice::(&payload) { @@ -288,6 +296,7 @@ impl Processor { self.inferrer.infer_span(&payload_value, &self.aws_config); self.extracted_span_context = self.extract_span_context(&headers, &payload_value); + // Set the extracted trace context to the spans if let Some(sc) = &self.extracted_span_context { self.span.trace_id = sc.trace_id; self.span.parent_id = sc.span_id; @@ -302,6 +311,8 @@ impl Processor { } } + // If we have an inferred span, set the invocation span parent id + // to be the inferred span id, even if we don't have an extracted trace context if let Some(inferred_span) = &self.inferrer.inferred_span { self.span.parent_id = inferred_span.span_id; } @@ -312,11 +323,8 @@ impl Processor { headers: &HashMap, payload_value: &Value, ) -> Option { - if let Some(carrier) = self.inferrer.get_carrier() { - if let Some(sc) = self.propagator.extract(&carrier) { - debug!("Extracted trace context from inferred span"); - return Some(sc); - } + if let Some(sc) = self.inferrer.get_span_context(&self.propagator) { + return Some(sc); } if let Some(payload_headers) = payload_value.get("headers") { @@ -363,36 +371,51 @@ impl Processor { } fn update_span_context_from_headers(&mut self, headers: &HashMap) { - // todo: fix this, code is a copy of the existing logic in Go, not accounting - // when a 128 bit trace id exist let mut trace_id = 0; - let mut span_id = 0; let mut parent_id = 0; + let mut tags: HashMap = HashMap::new(); - // If we have a trace context, update the span context + // If we have a trace context, this means we got it from + // distributed tracing if let Some(sc) = &mut self.extracted_span_context { + debug!("Trace context was found, not extracting it from incoming headers"); trace_id = sc.trace_id; - span_id = sc.span_id; + parent_id = sc.span_id; + tags.extend(sc.tags.clone()); } - if let Some(header) = headers.get("x-datadog-trace-id") { - trace_id = header.parse::().unwrap_or(0); - } + // We are the root span, so we should extract the trace context + // from the tracer, which has sent it through end invocation headers + if trace_id == 0 { + debug!("No trace context found, extracting it from headers"); + // Extract trace context from headers manually + if let Some(header) = headers.get(DATADOG_TRACE_ID_KEY) { + trace_id = header.parse::().unwrap_or(0); + } + + if let Some(header) = headers.get(DATADOG_PARENT_ID_KEY) { + parent_id = header.parse::().unwrap_or(0); + } + + // TODO: sampling priority extraction - if let Some(header) = headers.get("x-datadog-span-id") { - span_id = header.parse::().unwrap_or(0); + // Extract tags from headers + // Used for 128 bit trace ids + tags = DatadogHeaderPropagator::extract_tags(headers); } - if let Some(header) = headers.get("x-datadog-parent-id") { - parent_id = header.parse::().unwrap_or(0); + // We should always use the generated trace id from the tracer + if let Some(header) = headers.get(DATADOG_SPAN_ID_KEY) { + self.span.span_id = header.parse::().unwrap_or(0); } self.span.trace_id = trace_id; - self.span.span_id = span_id; - // If no inferred span, set the parent id right away - if self.inferrer.inferred_span.is_none() { + if self.inferrer.inferred_span.is_some() { + self.inferrer.extend_meta(tags); + } else { self.span.parent_id = parent_id; + self.span.meta.extend(tags); } } diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index 39314dd83..a4a3a5600 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -13,19 +13,27 @@ use crate::lifecycle::invocation::triggers::{ dynamodb_event::DynamoDbRecord, event_bridge_event::EventBridgeEvent, kinesis_event::KinesisRecord, + s3_event::S3Record, sns_event::{SnsEntity, SnsRecord}, sqs_event::SqsRecord, + step_function_event::StepFunctionEvent, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, }; use crate::tags::lambda::tags::{INIT_TYPE, SNAP_START_VALUE}; - -use super::triggers::s3_event::S3Record; +use crate::traces::{context::SpanContext, propagation::Propagator}; pub struct SpanInferrer { + // Span inferred from the Lambda incoming request payload pub inferred_span: Option, + // Nested span inferred from the Lambda incoming request payload pub wrapped_inferred_span: Option, + // If the inferred span is async is_async_span: bool, + // Carrier to extract the span context from carrier: Option>, + // Generated Span Context from Step Functions + generated_span_context: Option, + // Tags generated from the trigger trigger_tags: Option>, } @@ -43,6 +51,7 @@ impl SpanInferrer { wrapped_inferred_span: None, is_async_span: false, carrier: None, + generated_span_context: None, trigger_tags: None, } } @@ -51,11 +60,13 @@ impl SpanInferrer { /// and try matching it to a `Trigger` implementation, which will create /// an inferred span and set it to `self.inferred_span` /// + #[allow(clippy::too_many_lines)] pub fn infer_span(&mut self, payload_value: &Value, aws_config: &AwsConfig) { self.inferred_span = None; self.wrapped_inferred_span = None; self.is_async_span = false; self.carrier = None; + self.generated_span_context = None; self.trigger_tags = None; let mut trigger: Option> = None; @@ -167,6 +178,11 @@ impl SpanInferrer { if let Some(t) = KinesisRecord::new(payload_value.clone()) { t.enrich_span(&mut inferred_span); + trigger = Some(Box::new(t)); + } + } else if StepFunctionEvent::is_match(payload_value) { + if let Some(t) = StepFunctionEvent::new(payload_value.clone()) { + self.generated_span_context = Some(t.get_span_context()); trigger = Some(Box::new(t)); } } else { @@ -184,7 +200,13 @@ impl SpanInferrer { self.trigger_tags = Some(trigger_tags); self.carrier = Some(t.get_carrier()); self.is_async_span = t.is_async(); - self.inferred_span = Some(inferred_span); + + // For Step Functions, there is no inferred span + if self.generated_span_context.is_some() { + self.inferred_span = None; + } else { + self.inferred_span = Some(inferred_span); + } } } @@ -263,11 +285,23 @@ impl SpanInferrer { rng.gen() } - /// Returns a clone of the carrier associated with the inferred span + /// Returns the extracted span context /// - #[must_use] - pub fn get_carrier(&self) -> Option> { - self.carrier.clone() + /// If the carrier is set, it will try to extract the span context, + /// otherwise it will + /// + pub fn get_span_context(&self, propagator: &impl Propagator) -> Option { + // Step Functions `SpanContext` is deterministically generated + if let Some(sc) = &self.generated_span_context { + return Some(sc.clone()); + } + + if let Some(sc) = self.carrier.as_ref().and_then(|c| propagator.extract(c)) { + debug!("Extracted trace context from inferred span"); + return Some(sc); + } + + None } /// Returns a clone of the tags associated with the inferred span diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index a5d0f4ae2..6802174b1 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -12,6 +12,7 @@ pub mod kinesis_event; pub mod s3_event; pub mod sns_event; pub mod sqs_event; +pub mod step_function_event; pub const DATADOG_CARRIER_KEY: &str = "_datadog"; pub const FUNCTION_TRIGGER_EVENT_SOURCE_TAG: &str = "function_trigger.event_source"; diff --git a/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs b/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs new file mode 100644 index 000000000..91eb2af54 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs @@ -0,0 +1,366 @@ +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use sha2::{Digest, Sha256}; + +use crate::{ + lifecycle::invocation::triggers::{Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, + traces::{ + context::{Sampling, SpanContext}, + propagation::text_map_propagator::DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY, + }, +}; + +#[allow(clippy::module_name_repetitions)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LegacyStepFunctionEvent { + #[serde(rename = "Payload")] + pub payload: StepFunctionEvent, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct StepFunctionEvent { + #[serde(rename = "Execution")] + pub execution: Execution, + #[serde(rename = "State")] + pub state: State, + #[serde(rename = "StateMachine")] + pub state_machine: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Execution { + #[serde(rename = "Id")] + id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct State { + #[serde(rename = "Name")] + name: String, + #[serde(rename = "EnteredTime")] + entered_time: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct StateMachine { + #[serde(rename = "Id")] + id: String, +} + +impl Trigger for StepFunctionEvent { + fn new(payload: serde_json::Value) -> Option + where + Self: Sized, + { + let p = payload.get("Payload").unwrap_or(&payload); + match serde_json::from_value::(p.clone()) { + Ok(event) => Some(event), + Err(e) => { + tracing::debug!("Failed to deserialize Step Function Event: {e}"); + None + } + } + } + + fn is_match(payload: &serde_json::Value) -> bool + where + Self: Sized, + { + // Check first if the payload is a Legacy Step Function event + let p = payload.get("Payload").unwrap_or(payload); + + let execution_id = p + .get("Execution") + .and_then(Value::as_object) + .and_then(|e| e.get("Id")); + let state = p.get("State").and_then(Value::as_object); + let name = state.and_then(|s| s.get("Name")); + let entered_time = state.and_then(|s| s.get("EnteredTime")); + + execution_id.is_some() && name.is_some() && entered_time.is_some() + } + + fn enrich_span(&self, _span: &mut datadog_trace_protobuf::pb::Span) {} + + fn get_tags(&self) -> HashMap { + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "states".to_string(), + )]) + } + + fn get_arn(&self, _region: &str) -> String { + if let Some(sm) = &self.state_machine { + return sm.id.clone(); + } + + String::new() + } + + fn get_carrier(&self) -> HashMap { + HashMap::new() + } + + fn is_async(&self) -> bool { + true + } +} + +impl StepFunctionEvent { + #[must_use] + pub fn get_span_context(&self) -> SpanContext { + let (lo_tid, hi_tid) = Self::generate_trace_id(self.execution.id.clone()); + let tags = HashMap::from([( + DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY.to_string(), + format!("{hi_tid:x}"), + )]); + + let parent_id = Self::generate_parent_id( + self.execution.id.clone(), + self.state.name.clone(), + self.state.entered_time.clone(), + ); + + SpanContext { + trace_id: lo_tid, + span_id: parent_id, + // Priority Auto Keep + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("states".to_string()), + tags, + links: vec![], + } + } + + /// Generates a random 64 bit ID from the formatted hash of the + /// Step Function Execution ARN, the State Name, and the State Entered Time + /// + fn generate_parent_id( + execution_id: String, + state_name: String, + state_entered_time: String, + ) -> u64 { + let unique_string = format!("{execution_id}#{state_name}#{state_entered_time}"); + + let hash = Sha256::digest(unique_string.as_bytes()); + Self::get_positive_u64(&hash[0..8]) + } + + /// Generates a random 128 bit ID from the Step Function Execution ARN + /// + fn generate_trace_id(execution_arn: String) -> (u64, u64) { + let hash = Sha256::digest(execution_arn.as_bytes()); + + let lower_order_bits = Self::get_positive_u64(&hash[8..16]); + let higher_order_bits = Self::get_positive_u64(&hash[0..8]); + + (lower_order_bits, higher_order_bits) + } + + /// Converts the first 8 bytes of a byte array to a positive `u64` + /// + fn get_positive_u64(hash_bytes: &[u8]) -> u64 { + let mut result: u64 = hash_bytes + .iter() + .take(8) + .fold(0, |acc, &byte| (acc << 8) + u64::from(byte)); + + // Ensure the highest bit is always 0 + result &= !(1u64 << 63); + + // Return 1 if result is 0 + if result == 0 { + 1 + } else { + result + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = StepFunctionEvent::new(payload).expect("Failed to deserialize into Event"); + + let expected = StepFunctionEvent { + execution: Execution { + id: String::from("arn:aws:states:us-east-1:425362996713:execution:agocsTestSF:bc9f281c-3daa-4e5a-9a60-471a3810bf44"), + }, + state: State { + name: String::from("agocsTest1"), + entered_time: String::from("2024-07-30T19:55:53.018Z"), + }, + state_machine: Some(StateMachine { + id: String::from("arn:aws:states:us-east-1:425362996713:stateMachine:agocsTestSF"), + }), + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_new_legacy_event() { + let json = read_json_file("step_function_legacy_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = StepFunctionEvent::new(payload).expect("Failed to deserialize into Event"); + + let expected = StepFunctionEvent { + execution: Execution { + id: String::from("arn:aws:states:us-east-1:425362996713:execution:agocsTestSF:bc9f281c-3daa-4e5a-9a60-471a3810bf44"), + }, + state: State { + name: String::from("agocsTest1"), + entered_time: String::from("2024-07-30T19:55:53.018Z"), + }, + state_machine: Some(StateMachine { + id: String::from("arn:aws:states:us-east-1:425362996713:stateMachine:agocsTestSF"), + }), + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize StepFunctionEvent"); + + assert!(StepFunctionEvent::is_match(&payload)); + } + + #[test] + fn test_is_match_legacy_event() { + let json = read_json_file("step_function_legacy_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize StepFunctionEvent"); + + assert!(StepFunctionEvent::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!StepFunctionEvent::is_match(&payload)); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + StepFunctionEvent::new(payload).expect("Failed to deserialize StepFunctionEvent"); + let tags = event.get_tags(); + + let expected = HashMap::from([( + "function_trigger.event_source".to_string(), + "states".to_string(), + )]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + StepFunctionEvent::new(payload).expect("Failed to deserialize StepFunctionEvent"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:states:us-east-1:425362996713:stateMachine:agocsTestSF" + ); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + StepFunctionEvent::new(payload).expect("Failed to deserialize StepFunctionEvent"); + let carrier = event.get_carrier(); + + let expected = HashMap::new(); + + assert_eq!(carrier, expected); + } + + #[test] + fn get_span_context() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + StepFunctionEvent::new(payload).expect("Failed to deserialize StepFunctionEvent"); + + let span_context = event.get_span_context(); + + let expected = SpanContext { + trace_id: 5744042798732701615, + span_id: 2902498116043018663, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("states".to_string()), + tags: HashMap::from([( + DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY.to_string(), + "1914fe7789eb32be".to_string(), + )]), + links: vec![], + }; + + assert_eq!(span_context, expected); + } + + #[test] + fn test_generate_parent_id() { + let parent_id = StepFunctionEvent::generate_parent_id( + String::from("arn:aws:states:sa-east-1:601427271234:express:DatadogStateMachine:acaf1a67-336a-e854-1599-2a627eb2dd8a:c8baf081-31f1-464d-971f-70cb17d01111"), + String::from("step-one"), + String::from("2022-12-08T21:08:19.224Z") + ); + + assert_eq!(parent_id, 4340734536022949921); + + let parent_id = StepFunctionEvent::generate_parent_id( + String::from("arn:aws:states:sa-east-1:601427271234:express:DatadogStateMachine:acaf1a67-336a-e854-1599-2a627eb2dd8a:c8baf081-31f1-464d-971f-70cb17d01111"), + String::from("step-one"), + String::from("2022-12-08T21:08:19.224Y") + ); + + assert_eq!(parent_id, 981693280319792699); + } + + #[test] + fn test_generate_trace_id() { + let (lo_tid, hi_tid) = StepFunctionEvent::generate_trace_id(String::from( + "arn:aws:states:sa-east-1:425362996713:stateMachine:MyStateMachine-b276uka1j", + )); + let hex_tid = format!("{:x}", hi_tid); + + assert_eq!(lo_tid, 1680583253837593461); + assert_eq!(hi_tid, 6984552746569958392); + + assert_eq!(hex_tid, "60ee1db79e4803f8"); + + let (lo_tid, hi_tid) = StepFunctionEvent::generate_trace_id( + String::from("arn:aws:states:us-east-1:425362996713:execution:agocsTestSF:bc9f281c-3daa-4e5a-9a60-471a3810bf44") + ); + let hex_tid = format!("{:x}", hi_tid); + + assert_eq!(lo_tid, 5744042798732701615); + assert_eq!(hi_tid, 1807349139850867390); + + assert_eq!(hex_tid, "1914fe7789eb32be"); + } +} diff --git a/bottlecap/src/metrics/enhanced/statfs.rs b/bottlecap/src/metrics/enhanced/statfs.rs index 0da8a1828..84e7412f1 100644 --- a/bottlecap/src/metrics/enhanced/statfs.rs +++ b/bottlecap/src/metrics/enhanced/statfs.rs @@ -5,6 +5,7 @@ use std::io; use std::path::Path; #[cfg(not(target_os = "windows"))] +#[allow(clippy::cast_lossless)] /// Returns the block size, total number of blocks, and number of blocks available for the specified directory path. /// pub fn statfs_info(path: &str) -> Result<(f64, f64, f64), io::Error> { diff --git a/bottlecap/src/traces/propagation/text_map_propagator.rs b/bottlecap/src/traces/propagation/text_map_propagator.rs index 34b482643..f1c5cbcf4 100644 --- a/bottlecap/src/traces/propagation/text_map_propagator.rs +++ b/bottlecap/src/traces/propagation/text_map_propagator.rs @@ -13,7 +13,8 @@ use crate::traces::propagation::{ // Datadog Keys pub const DATADOG_TRACE_ID_KEY: &str = "x-datadog-trace-id"; -const DATADOG_PARENT_ID_KEY: &str = "x-datadog-parent-id"; +pub const DATADOG_PARENT_ID_KEY: &str = "x-datadog-parent-id"; +pub const DATADOG_SPAN_ID_KEY: &str = "x-datadog-span-id"; pub const DATADOG_SAMPLING_PRIORITY_KEY: &str = "x-datadog-sampling-priority"; const DATADOG_ORIGIN_KEY: &str = "x-datadog-origin"; pub const DATADOG_TAGS_KEY: &str = "x-datadog-tags"; @@ -148,7 +149,7 @@ impl DatadogHeaderPropagator { Some(origin.to_string()) } - fn extract_tags(carrier: &dyn Extractor) -> HashMap { + pub fn extract_tags(carrier: &dyn Extractor) -> HashMap { let carrier_tags = carrier.get(DATADOG_TAGS_KEY).unwrap_or_default(); let mut tags: HashMap = HashMap::new(); diff --git a/bottlecap/tests/payloads/step_function_event.json b/bottlecap/tests/payloads/step_function_event.json new file mode 100644 index 000000000..1461c7164 --- /dev/null +++ b/bottlecap/tests/payloads/step_function_event.json @@ -0,0 +1,19 @@ +{ + "Execution": { + "Id": "arn:aws:states:us-east-1:425362996713:execution:agocsTestSF:bc9f281c-3daa-4e5a-9a60-471a3810bf44", + "Input": {}, + "StartTime": "2024-07-30T19:55:52.976Z", + "Name": "bc9f281c-3daa-4e5a-9a60-471a3810bf44", + "RoleArn": "arn:aws:iam::425362996713:role/test-serverless-stepfunctions-dev-AgocsTestSFRole-tRkeFXScjyk4", + "RedriveCount": 0 + }, + "StateMachine": { + "Id": "arn:aws:states:us-east-1:425362996713:stateMachine:agocsTestSF", + "Name": "agocsTestSF" + }, + "State": { + "Name": "agocsTest1", + "EnteredTime": "2024-07-30T19:55:53.018Z", + "RetryCount": 0 + } +} diff --git a/bottlecap/tests/payloads/step_function_legacy_event.json b/bottlecap/tests/payloads/step_function_legacy_event.json new file mode 100644 index 000000000..74e4c010a --- /dev/null +++ b/bottlecap/tests/payloads/step_function_legacy_event.json @@ -0,0 +1,21 @@ +{ + "Payload": { + "Execution": { + "Id": "arn:aws:states:us-east-1:425362996713:execution:agocsTestSF:bc9f281c-3daa-4e5a-9a60-471a3810bf44", + "Input": {}, + "StartTime": "2024-07-30T19:55:52.976Z", + "Name": "bc9f281c-3daa-4e5a-9a60-471a3810bf44", + "RoleArn": "arn:aws:iam::425362996713:role/test-serverless-stepfunctions-dev-AgocsTestSFRole-tRkeFXScjyk4", + "RedriveCount": 0 + }, + "StateMachine": { + "Id": "arn:aws:states:us-east-1:425362996713:stateMachine:agocsTestSF", + "Name": "agocsTestSF" + }, + "State": { + "Name": "agocsTest1", + "EnteredTime": "2024-07-30T19:55:53.018Z", + "RetryCount": 0 + } + } +} From dbf731aa1027d4cc7d01ce9c9b5b13a3d8da9c8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:53:16 -0500 Subject: [PATCH 30/41] feat(bottlecap): add Lambda Function URL inferred spans (#451) * add `LambdaFunctionUrlEvent` * fmt * update span inferrer --- .../src/lifecycle/invocation/span_inferrer.rs | 11 +- .../triggers/api_gateway_http_event.rs | 3 + .../triggers/lambda_function_url_event.rs | 309 ++++++++++++++++++ .../src/lifecycle/invocation/triggers/mod.rs | 1 + .../payloads/lambda_function_url_event.json | 46 +++ 5 files changed, 368 insertions(+), 2 deletions(-) create mode 100644 bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs create mode 100644 bottlecap/tests/payloads/lambda_function_url_event.json diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index a4a3a5600..edcdabe55 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -13,6 +13,7 @@ use crate::lifecycle::invocation::triggers::{ dynamodb_event::DynamoDbRecord, event_bridge_event::EventBridgeEvent, kinesis_event::KinesisRecord, + lambda_function_url_event::LambdaFunctionUrlEvent, s3_event::S3Record, sns_event::{SnsEntity, SnsRecord}, sqs_event::SqsRecord, @@ -85,6 +86,12 @@ impl SpanInferrer { if let Some(t) = APIGatewayRestEvent::new(payload_value.clone()) { t.enrich_span(&mut inferred_span); + trigger = Some(Box::new(t)); + } + } else if LambdaFunctionUrlEvent::is_match(payload_value) { + if let Some(t) = LambdaFunctionUrlEvent::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span); + trigger = Some(Box::new(t)); } } else if SqsRecord::is_match(payload_value) { @@ -192,10 +199,10 @@ impl SpanInferrer { // Inferred a trigger if let Some(t) = trigger { let mut trigger_tags = t.get_tags(); - trigger_tags.extend([( + trigger_tags.insert( FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), t.get_arn(&aws_config.region), - )]); + ); self.trigger_tags = Some(trigger_tags); self.carrier = Some(t.get_carrier()); diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs index e07d86692..db8077257 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs @@ -253,6 +253,7 @@ mod tests { } #[test] + fn test_is_not_match() { let json = read_json_file("api_gateway_proxy_event.json"); let payload = @@ -323,6 +324,7 @@ mod tests { } #[test] + fn test_enrich_span_parameterized() { let json = read_json_file("api_gateway_http_event_parameterized.json"); let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); @@ -379,6 +381,7 @@ mod tests { ]); assert_eq!(tags, expected); } + #[test] fn test_get_arn() { let json = read_json_file("api_gateway_http_event.json"); diff --git a/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs b/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs new file mode 100644 index 000000000..087677a27 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs @@ -0,0 +1,309 @@ +use std::{collections::HashMap, env}; + +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use crate::lifecycle::invocation::{ + processor::MS_TO_NS, + triggers::{lowercase_key, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LambdaFunctionUrlEvent { + #[serde(serialize_with = "lowercase_key")] + pub headers: HashMap, + #[serde(rename = "requestContext")] + pub request_context: RequestContext, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct RequestContext { + pub http: Http, + #[serde(rename = "accountId")] + pub account_id: String, + #[serde(rename = "domainName")] + pub domain_name: String, + #[serde(rename = "timeEpoch")] + pub time_epoch: i64, + #[serde(rename = "requestId")] + pub request_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Http { + pub method: String, + pub path: String, + pub protocol: String, + #[serde(rename = "sourceIp")] + pub source_ip: String, + #[serde(rename = "userAgent")] + pub user_agent: String, +} + +impl Trigger for LambdaFunctionUrlEvent { + fn new(payload: serde_json::Value) -> Option + where + Self: Sized, + { + serde_json::from_value(payload).ok()? + } + + fn is_match(payload: &serde_json::Value) -> bool + where + Self: Sized, + { + payload + .get("requestContext") + .and_then(|rc| rc.get("domainName")) + .and_then(Value::as_str) + .map_or(false, |dn| dn.contains("lambda-url")) + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span) { + let resource = format!( + "{} {}", + self.request_context.http.method, self.request_context.http.path + ); + + let http_url = format!( + "https://{domain_name}{path}", + domain_name = self.request_context.domain_name.clone(), + path = self.request_context.http.path.clone() + ); + + let start_time = (self.request_context.time_epoch as f64 * MS_TO_NS) as i64; + // todo: service mapping and peer service + let service_name = self.request_context.domain_name.clone(); + + span.name = String::from("aws.lambda.url"); + span.service = service_name; + span.resource = resource; + span.r#type = String::from("http"); + span.start = start_time; + span.meta.extend([ + ( + "endpoint".to_string(), + self.request_context.http.path.clone(), + ), + ("http.url".to_string(), http_url), + ( + "http.method".to_string(), + self.request_context.http.method.clone(), + ), + ( + "http.user_agent".to_string(), + self.request_context.http.user_agent.clone(), + ), + ( + "http.source_ip".to_string(), + self.request_context.http.source_ip.clone(), + ), + ( + "http.protocol".to_string(), + self.request_context.http.protocol.clone(), + ), + ("operation_name".to_string(), "aws.lambda.url".to_string()), + ( + "request_id".to_string(), + self.request_context.request_id.clone(), + ), + ]); + } + + fn get_tags(&self) -> HashMap { + let mut tags = HashMap::from([ + ( + "http.url".to_string(), + format!( + "https://{domain_name}{path}", + domain_name = self.request_context.domain_name.clone(), + path = self.request_context.http.path.clone() + ), + ), + // path and URL are full + // /users/12345/profile + ( + "http.url_details.path".to_string(), + self.request_context.http.path.clone(), + ), + ( + "http.method".to_string(), + self.request_context.http.method.clone(), + ), + ( + "http.user_agent".to_string(), + self.request_context.http.user_agent.clone(), + ), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "lambda-function-url".to_string(), + ), + ]); + + if let Some(referer) = self.headers.get("referer") { + tags.insert("http.referer".to_string(), referer.clone()); + } + + tags + } + + fn get_arn(&self, region: &str) -> String { + let function_name = env::var("AWS_LAMBDA_FUNCTION_NAME").unwrap_or_default(); + format!( + "arn:aws:lambda:{region}:{}:url:{}", + self.request_context.account_id, function_name + ) + } + + fn get_carrier(&self) -> HashMap { + self.headers.clone() + } + + fn is_async(&self) -> bool { + self.headers + .get("x-amz-invocation-type") + .is_some_and(|v| v == "Event") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("lambda_function_url_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = LambdaFunctionUrlEvent::new(payload) + .expect("Failed to deserialize into LambdaFunctionUrlEvent"); + + let expected = LambdaFunctionUrlEvent { + headers: HashMap::from([ + ("accept".to_string(), "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9".to_string()), + ("accept-language".to_string(), "en-US,en;q=0.9".to_string()), + ("accept-encoding".to_string(), "gzip, deflate, br".to_string()), + ("sec-fetch-mode".to_string(), "navigate".to_string()), + ("sec-fetch-site".to_string(), "none".to_string()), + ("sec-fetch-user".to_string(), "?1".to_string()), + ("sec-fetch-dest".to_string(), "document".to_string()), + ("sec-ch-ua".to_string(), "\"Google Chrome\";v=\"95\", \"Chromium\";v=\"95\", \";Not A Brand\";v=\"99\"".to_string()), + ("sec-ch-ua-platform".to_string(), "\"macOS\"".to_string()), + ("sec-ch-ua-mobile".to_string(), "?0".to_string()), + ("upgrade-insecure-requests".to_string(), "1".to_string()), + ( + "accept-language".to_string(), + "en-US,en;q=0.9".to_string(), + ), + ("user-agent".to_string(), "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36".to_string()), + ( + "x-amzn-trace-id".to_string(), + "Root=1-61953929-1ec00c3011062a48477b169e".to_string(), + ), + ("x-forwarded-for".to_string(), "71.195.30.42".to_string()), + ("x-forwarded-port".to_string(), "443".to_string()), + ("x-forwarded-proto".to_string(), "https".to_string()), + ("pragma".to_string(), "no-cache".to_string()), + ("cache-control".to_string(), "no-cache".to_string()), + ("host".to_string(), "a8hyhsshac.lambda-url.eu-south-1.amazonaws.com".to_string()), + + ]), + request_context: RequestContext { + request_id: String::from("ec4d58f8-2b8b-4ceb-a1d5-2be7bff58505"), + time_epoch: 1637169449721, + http: Http { + method: String::from("GET"), + path: String::from("/"), + protocol: String::from("HTTP/1.1"), + source_ip: String::from("71.195.30.42"), + user_agent: String::from("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"), + }, + account_id: String::from("601427279990"), + domain_name: String::from("a8hyhsshac.lambda-url.eu-south-1.amazonaws.com"), + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("lambda_function_url_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize LambdaFunctionUrlEvent"); + + assert!(LambdaFunctionUrlEvent::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("api_gateway_proxy_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize LambdaFunctionUrlEvent"); + assert!(!LambdaFunctionUrlEvent::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("lambda_function_url_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = LambdaFunctionUrlEvent::new(payload) + .expect("Failed to deserialize LambdaFunctionUrlEvent"); + let mut span = Span::default(); + event.enrich_span(&mut span); + assert_eq!(span.name, "aws.lambda.url"); + assert_eq!( + span.service, + "a8hyhsshac.lambda-url.eu-south-1.amazonaws.com" + ); + assert_eq!(span.resource, "GET /"); + assert_eq!(span.r#type, "http"); + assert_eq!( + span.meta, + HashMap::from([ + ("http.protocol".to_string(), "HTTP/1.1".to_string()), + ("http.source_ip".to_string(), "71.195.30.42".to_string()), + ("operation_name".to_string(), "aws.lambda.url".to_string()), + ("request_id".to_string(), "ec4d58f8-2b8b-4ceb-a1d5-2be7bff58505".to_string()), + ("http.url".to_string(), "https://a8hyhsshac.lambda-url.eu-south-1.amazonaws.com/".to_string()), + ("http.method".to_string(), "GET".to_string()), + ("http.user_agent".to_string(), "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36".to_string()), + ("endpoint".to_string(), "/".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("lambda_function_url_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = LambdaFunctionUrlEvent::new(payload) + .expect("Failed to deserialize LambdaFunctionUrlEvent"); + let tags = event.get_tags(); + let expected = HashMap::from([ + ("function_trigger.event_source".to_string(), "lambda-function-url".to_string()), + ("http.method".to_string(), "GET".to_string()), + ("http.url_details.path".to_string(), "/".to_string()), + ("http.user_agent".to_string(), "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36".to_string()), + ("http.url".to_string(), "https://a8hyhsshac.lambda-url.eu-south-1.amazonaws.com/".to_string()), + ]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + env::set_var("AWS_LAMBDA_FUNCTION_NAME", "mock-lambda"); + let json = read_json_file("lambda_function_url_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = LambdaFunctionUrlEvent::new(payload) + .expect("Failed to deserialize LambdaFunctionUrlEvent"); + assert_eq!( + event.get_arn("sa-east-1"), + "arn:aws:lambda:sa-east-1:601427279990:url:mock-lambda" + ); + env::remove_var("AWS_LAMBDA_FUNCTION_NAME"); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index 6802174b1..6704a459d 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -9,6 +9,7 @@ pub mod api_gateway_rest_event; pub mod dynamodb_event; pub mod event_bridge_event; pub mod kinesis_event; +pub mod lambda_function_url_event; pub mod s3_event; pub mod sns_event; pub mod sqs_event; diff --git a/bottlecap/tests/payloads/lambda_function_url_event.json b/bottlecap/tests/payloads/lambda_function_url_event.json new file mode 100644 index 000000000..324dae524 --- /dev/null +++ b/bottlecap/tests/payloads/lambda_function_url_event.json @@ -0,0 +1,46 @@ +{ + "version": "2.0", + "routeKey": "$default", + "rawPath": "/", + "rawQueryString": "", + "headers": { + "sec-fetch-mode": "navigate", + "sec-fetch-site": "none", + "accept-language": "en-US,en;q=0.9", + "x-forwarded-proto": "https", + "x-forwarded-port": "443", + "x-forwarded-for": "71.195.30.42", + "sec-fetch-user": "?1", + "pragma": "no-cache", + "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + "sec-ch-ua": "\"Google Chrome\";v=\"95\", \"Chromium\";v=\"95\", \";Not A Brand\";v=\"99\"", + "sec-ch-ua-mobile": "?0", + "x-amzn-trace-id": "Root=1-61953929-1ec00c3011062a48477b169e", + "sec-ch-ua-platform": "\"macOS\"", + "host": "a8hyhsshac.lambda-url.eu-south-1.amazonaws.com", + "upgrade-insecure-requests": "1", + "cache-control": "no-cache", + "accept-encoding": "gzip, deflate, br", + "sec-fetch-dest": "document", + "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" + }, + "requestContext": { + "accountId": "601427279990", + "apiId": "a8hyhsshac", + "domainName": "a8hyhsshac.lambda-url.eu-south-1.amazonaws.com", + "domainPrefix": "a8hyhsshac", + "http": { + "method": "GET", + "path": "/", + "protocol": "HTTP/1.1", + "sourceIp": "71.195.30.42", + "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" + }, + "requestId": "ec4d58f8-2b8b-4ceb-a1d5-2be7bff58505", + "routeKey": "$default", + "stage": "$default", + "time": "17/Nov/2021:17:17:29 +0000", + "timeEpoch": 1637169449721 + }, + "isBase64Encoded": false +} From 5583815e505e65a2b2c7306d5052b208d67c714c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Thu, 14 Nov 2024 16:32:48 -0500 Subject: [PATCH 31/41] feat(bottlecap): add capture lambda payload (#454) * add `tag_span_from_value` * add `capture_lambda_payload` config * add unit testing for `tag_span_from_value` * update listener `end_invocation_handler` parsing should not be handled here * add capture lambda payload feature also parse body properly, and handle `statusCode` --- bottlecap/src/config/mod.rs | 5 +- bottlecap/src/lifecycle/invocation/mod.rs | 243 ++++++++++++++++++ .../src/lifecycle/invocation/processor.rs | 47 +++- bottlecap/src/lifecycle/listener.rs | 32 ++- 4 files changed, 300 insertions(+), 27 deletions(-) diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index 597e954f3..df2389912 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -65,6 +65,8 @@ pub struct Config { pub serverless_flush_strategy: FlushStrategy, pub enhanced_metrics: bool, pub https_proxy: Option, + pub capture_lambda_payload: bool, + pub capture_lambda_payload_max_depth: u32, // Trace Propagation #[serde(deserialize_with = "deserialize_trace_propagation_style")] pub trace_propagation_style: Vec, @@ -93,8 +95,9 @@ impl Default for Config { logs_config_processing_rules: None, // Metrics enhanced_metrics: true, - // Failover https_proxy: None, + capture_lambda_payload: false, + capture_lambda_payload_max_depth: 10, // Trace Propagation trace_propagation_style: vec![ TracePropagationStyle::Datadog, diff --git a/bottlecap/src/lifecycle/invocation/mod.rs b/bottlecap/src/lifecycle/invocation/mod.rs index 454cfa3bc..aca184c2b 100644 --- a/bottlecap/src/lifecycle/invocation/mod.rs +++ b/bottlecap/src/lifecycle/invocation/mod.rs @@ -1,13 +1,256 @@ use base64::{engine::general_purpose, DecodeError, Engine}; +use datadog_trace_protobuf::pb::Span; +use serde_json::Value; +use tracing::debug; pub mod context; pub mod processor; pub mod span_inferrer; pub mod triggers; +const MAX_TAG_CHARS: usize = 4096; +const REDACTABLE_KEYS: [&str; 8] = [ + "password", + "passwd", + "pwd", + "secret", + "token", + "authorization", + "x-authorization", + "api_key", +]; + pub fn base64_to_string(base64_string: &str) -> Result { match general_purpose::STANDARD.decode(base64_string) { Ok(bytes) => Ok(String::from_utf8_lossy(&bytes).to_string()), Err(e) => Err(e), } } + +pub fn tag_span_from_value(span: &mut Span, key: &str, value: &Value, depth: u32, max_depth: u32) { + // Null scenario + if value.is_null() { + span.meta.insert(key.to_string(), value.to_string()); + return; + } + + // Check max depth + if depth >= max_depth { + match serde_json::to_string(value) { + Ok(s) => { + let truncated = s.chars().take(MAX_TAG_CHARS).collect::(); + span.meta.insert(key.to_string(), truncated); + return; + } + Err(e) => { + debug!("Unable to serialize value for tagging {e}"); + return; + } + } + } + + let new_depth = depth + 1; + match value { + // Handle string case + Value::String(s) => { + if let Ok(p) = serde_json::from_str::(s) { + tag_span_from_value(span, key, &p, new_depth, max_depth); + } else { + let truncated = s.chars().take(MAX_TAG_CHARS).collect::(); + span.meta + .insert(key.to_string(), redact_value(key, truncated)); + } + } + + // Handle number case + Value::Number(n) => { + span.meta.insert(key.to_string(), n.to_string()); + } + + // Handle boolean case + Value::Bool(b) => { + span.meta.insert(key.to_string(), b.to_string()); + } + + // Handle object case + Value::Object(map) => { + for (k, v) in map { + let new_key = format!("{key}.{k}"); + tag_span_from_value(span, &new_key, v, new_depth, max_depth); + } + } + + Value::Array(a) => { + if a.is_empty() { + span.meta.insert(key.to_string(), "[]".to_string()); + return; + } + + for (i, v) in a.iter().enumerate() { + let new_key = format!("{key}.{i}"); + tag_span_from_value(span, &new_key, v, new_depth, max_depth); + } + } + Value::Null => {} + } +} + +fn redact_value(key: &str, value: String) -> String { + let split_key = key.split('.').last().unwrap_or_default(); + if REDACTABLE_KEYS.contains(&split_key) { + String::from("redacted") + } else { + value + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use serde_json::json; + + use super::*; + + #[test] + fn test_simple_tagging() { + let mut span = Span::default(); + let value = json!({ "request": { "simple": "value" } }); + + tag_span_from_value(&mut span, "payload", &value, 0, 10); + + let expected = HashMap::from([("payload.request.simple".to_string(), "value".to_string())]); + + assert_eq!(span.meta, expected); + } + + #[test] + fn test_complex_object() { + let mut span = Span::default(); + let value = json!({ + "request": { + "simple": "value", + "obj": { + "arr": ["a", "b", "c"], + "boolean": true, + "nested": { + "value": "nested_value" + } + }, + "empty": null, + "number": 1, + "boolean": true, + } + }); + + tag_span_from_value(&mut span, "payload", &value, 0, 10); + + let expected = HashMap::from([ + ("payload.request.simple".to_string(), "value".to_string()), + ("payload.request.obj.arr.0".to_string(), "a".to_string()), + ("payload.request.obj.arr.1".to_string(), "b".to_string()), + ("payload.request.obj.arr.2".to_string(), "c".to_string()), + ( + "payload.request.obj.boolean".to_string(), + "true".to_string(), + ), + ( + "payload.request.obj.nested.value".to_string(), + "nested_value".to_string(), + ), + ("payload.request.empty".to_string(), "null".to_string()), + ("payload.request.number".to_string(), "1".to_string()), + ("payload.request.boolean".to_string(), "true".to_string()), + ]); + + assert_eq!(span.meta, expected); + } + + #[test] + fn test_array_of_objects() { + let mut span = Span::default(); + let value = json!({ + "request": [ + { "simple": "value" }, + { "simple": "value" }, + { "simple": "value" }, + ] + }); + + tag_span_from_value(&mut span, "payload", &value, 0, 10); + + let expected = HashMap::from([ + ("payload.request.0.simple".to_string(), "value".to_string()), + ("payload.request.1.simple".to_string(), "value".to_string()), + ("payload.request.2.simple".to_string(), "value".to_string()), + ]); + + assert_eq!(span.meta, expected); + } + + #[test] + fn test_reach_max_depth() { + let mut span = Span::default(); + let value = json!({ + "hello": "world", + "empty": null, + "level1": { + "obj": { + "level3": 3 + }, + "arr": [null, true, "great", { "l3": "v3" }], + "boolean": true, + "number": 2, + "empty": null, + "empty_obj": {}, + "empty_arr": [] + }, + "arr": [{ "a": "b" }, { "c": "d" }] + }); + + tag_span_from_value(&mut span, "payload", &value, 0, 2); + + let expected = HashMap::from([ + ("payload.hello".to_string(), "world".to_string()), + ("payload.empty".to_string(), "null".to_string()), + ( + "payload.level1.obj".to_string(), + "{\"level3\":3}".to_string(), + ), + ( + "payload.level1.arr".to_string(), + "[null,true,\"great\",{\"l3\":\"v3\"}]".to_string(), + ), + ("payload.level1.boolean".to_string(), "true".to_string()), + ("payload.level1.number".to_string(), "2".to_string()), + ("payload.level1.empty".to_string(), "null".to_string()), + ("payload.level1.empty_obj".to_string(), "{}".to_string()), + ("payload.level1.empty_arr".to_string(), "[]".to_string()), + ("payload.arr.0".to_string(), "{\"a\":\"b\"}".to_string()), + ("payload.arr.1".to_string(), "{\"c\":\"d\"}".to_string()), + ]); + + assert_eq!(span.meta, expected); + } + + #[test] + fn test_tag_redacts_key() { + let mut span = Span::default(); + let value = json!({ + "request": { + "headers": { + "authorization": "secret token", + } + } + }); + + tag_span_from_value(&mut span, "payload", &value, 0, 10); + + let expected = HashMap::from([( + "payload.request.headers.authorization".to_string(), + "redacted".to_string(), + )]); + + assert_eq!(span.meta, expected); + } +} diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 64a8582b1..9c72f4536 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -15,7 +15,7 @@ use tracing::debug; use crate::{ config::{self, AwsConfig}, lifecycle::invocation::{ - base64_to_string, context::ContextBuffer, span_inferrer::SpanInferrer, + base64_to_string, context::ContextBuffer, span_inferrer::SpanInferrer, tag_span_from_value, }, metrics::enhanced::lambda::{EnhancedMetricData, Lambda as EnhancedMetrics}, proc::{self, CPUData, NetworkData}, @@ -52,7 +52,7 @@ pub struct Processor { enhanced_metrics: EnhancedMetrics, aws_config: AwsConfig, tracer_detected: bool, - enhanced_metrics_enabled: bool, + config: Arc, } impl Processor { @@ -94,7 +94,7 @@ impl Processor { enhanced_metrics: EnhancedMetrics::new(metrics_aggregator, Arc::clone(&config)), aws_config: aws_config.clone(), tracer_detected: false, - enhanced_metrics_enabled: config.enhanced_metrics, + config: Arc::clone(&config), } } @@ -102,7 +102,7 @@ impl Processor { /// pub fn on_invoke_event(&mut self, request_id: String) { self.context_buffer.create_context(request_id.clone()); - if self.enhanced_metrics_enabled { + if self.config.enhanced_metrics { // Collect offsets for network and cpu metrics let network_offset: Option = proc::get_network_data().ok(); let cpu_offset: Option = proc::get_cpu_data().ok(); @@ -293,6 +293,17 @@ impl Processor { Err(_) => json!({}), }; + // Tag the invocation span with the request payload + if self.config.capture_lambda_payload { + tag_span_from_value( + &mut self.span, + "function.request", + &payload_value, + 0, + self.config.capture_lambda_payload_max_depth, + ); + } + self.inferrer.infer_span(&payload_value, &self.aws_config); self.extracted_span_context = self.extract_span_context(&headers, &payload_value); @@ -344,22 +355,34 @@ impl Processor { /// Given trace context information, set it to the current span. /// - pub fn on_invocation_end( - &mut self, - headers: HashMap, - status_code: Option, - ) { - if let Some(status_code) = status_code { + pub fn on_invocation_end(&mut self, headers: HashMap, payload: Vec) { + let payload_value = match serde_json::from_slice::(&payload) { + Ok(value) => value, + Err(_) => json!({}), + }; + + // Tag the invocation span with the request payload + if self.config.capture_lambda_payload { + tag_span_from_value( + &mut self.span, + "function.response", + &payload_value, + 0, + self.config.capture_lambda_payload_max_depth, + ); + } + + if let Some(status_code) = payload_value.get("statusCode").and_then(Value::as_str) { self.span .meta - .insert("http.status_code".to_string(), status_code.clone()); + .insert("http.status_code".to_string(), status_code.to_string()); if status_code.len() == 3 && status_code.starts_with('5') { self.span.error = 1; } // If we have an inferred span, set the status code to it - self.inferrer.set_status_code(status_code); + self.inferrer.set_status_code(status_code.to_string()); } self.update_span_context_from_headers(&headers); diff --git a/bottlecap/src/lifecycle/listener.rs b/bottlecap/src/lifecycle/listener.rs index 589815a4a..388fba133 100644 --- a/bottlecap/src/lifecycle/listener.rs +++ b/bottlecap/src/lifecycle/listener.rs @@ -141,23 +141,27 @@ impl Listener { ) -> http::Result> { debug!("Received end invocation request"); let (parts, body) = req.into_parts(); - let parsed_body = serde_json::from_slice::( - &hyper::body::to_bytes(body).await.unwrap_or_default(), - ); - let mut parsed_status_code: Option = None; - if let Some(sc) = parsed_body.unwrap_or_default().get("statusCode") { - parsed_status_code = Some(sc.to_string()); - } + match hyper::body::to_bytes(body).await { + Ok(b) => { + let body = b.to_vec(); + let mut processor = invocation_processor.lock().await; - let mut processor = invocation_processor.lock().await; + let headers = Self::headers_to_map(parts.headers); + processor.on_invocation_end(headers, body); + drop(processor); - let headers = Self::headers_to_map(parts.headers); - processor.on_invocation_end(headers, parsed_status_code); - drop(processor); + Response::builder() + .status(200) + .body(Body::from(json!({}).to_string())) + } + Err(e) => { + error!("Could not read end invocation request body {e}"); - Response::builder() - .status(200) - .body(Body::from(json!({}).to_string())) + Response::builder() + .status(400) + .body(Body::from("Could not read end invocation request body")) + } + } } fn hello_handler() -> http::Result> { From 3de9176dc177e2d17ac30ecf292b1d228634fd19 Mon Sep 17 00:00:00 2001 From: shreyamalpani Date: Thu, 14 Nov 2024 19:41:51 -0500 Subject: [PATCH 32/41] feat(bottlecap): generate file descriptor and threads enhanced metrics (#453) * add fd and threads enhanced metrics * clippy fixes * fixes * rename var --- bottlecap/src/lifecycle/invocation/context.rs | 2 + .../src/lifecycle/invocation/processor.rs | 8 + bottlecap/src/metrics/enhanced/constants.rs | 4 + bottlecap/src/metrics/enhanced/lambda.rs | 202 +++++++++++++ bottlecap/src/proc/constants.rs | 3 + bottlecap/src/proc/mod.rs | 269 ++++++++++++++++-- bottlecap/tests/proc/13/.gitkeep | 0 bottlecap/tests/proc/142/.gitkeep | 0 .../proc/process/invalid_malformed/31/limits | 17 ++ .../proc/process/invalid_malformed/9/limits | 17 ++ .../proc/process/invalid_missing/31/limits | 15 + .../proc/process/invalid_missing/9/limits | 15 + bottlecap/tests/proc/process/valid/31/fd/1 | 0 bottlecap/tests/proc/process/valid/31/fd/2 | 0 bottlecap/tests/proc/process/valid/31/limits | 17 ++ .../proc/process/valid/31/task/1/.gitkeep | 0 .../proc/process/valid/31/task/2/.gitkeep | 0 bottlecap/tests/proc/process/valid/31/task/3 | 0 bottlecap/tests/proc/process/valid/9/fd/1 | 0 bottlecap/tests/proc/process/valid/9/fd/2 | 0 bottlecap/tests/proc/process/valid/9/fd/3 | 0 bottlecap/tests/proc/process/valid/9/limits | 17 ++ .../proc/process/valid/9/task/1/.gitkeep | 0 .../proc/process/valid/9/task/2/.gitkeep | 0 .../proc/process/valid/9/task/3/.gitkeep | 0 25 files changed, 566 insertions(+), 20 deletions(-) create mode 100644 bottlecap/tests/proc/13/.gitkeep create mode 100644 bottlecap/tests/proc/142/.gitkeep create mode 100644 bottlecap/tests/proc/process/invalid_malformed/31/limits create mode 100644 bottlecap/tests/proc/process/invalid_malformed/9/limits create mode 100644 bottlecap/tests/proc/process/invalid_missing/31/limits create mode 100644 bottlecap/tests/proc/process/invalid_missing/9/limits create mode 100644 bottlecap/tests/proc/process/valid/31/fd/1 create mode 100644 bottlecap/tests/proc/process/valid/31/fd/2 create mode 100644 bottlecap/tests/proc/process/valid/31/limits create mode 100644 bottlecap/tests/proc/process/valid/31/task/1/.gitkeep create mode 100644 bottlecap/tests/proc/process/valid/31/task/2/.gitkeep create mode 100644 bottlecap/tests/proc/process/valid/31/task/3 create mode 100644 bottlecap/tests/proc/process/valid/9/fd/1 create mode 100644 bottlecap/tests/proc/process/valid/9/fd/2 create mode 100644 bottlecap/tests/proc/process/valid/9/fd/3 create mode 100644 bottlecap/tests/proc/process/valid/9/limits create mode 100644 bottlecap/tests/proc/process/valid/9/task/1/.gitkeep create mode 100644 bottlecap/tests/proc/process/valid/9/task/2/.gitkeep create mode 100644 bottlecap/tests/proc/process/valid/9/task/3/.gitkeep diff --git a/bottlecap/src/lifecycle/invocation/context.rs b/bottlecap/src/lifecycle/invocation/context.rs index 4cffb929c..7010e245a 100644 --- a/bottlecap/src/lifecycle/invocation/context.rs +++ b/bottlecap/src/lifecycle/invocation/context.rs @@ -320,12 +320,14 @@ mod tests { let uptime_offset = Some(50.0); let (tmp_chan_tx, _) = watch::channel(()); + let (process_chan_tx, _) = watch::channel(()); let enhanced_metric_data = Some(EnhancedMetricData { network_offset, cpu_offset, uptime_offset, tmp_chan_tx, + process_chan_tx, }); buffer.add_enhanced_metric_data(&request_id, enhanced_metric_data.clone()); diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 9c72f4536..a94164a18 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -112,11 +112,17 @@ impl Processor { let (tmp_chan_tx, tmp_chan_rx) = watch::channel(()); self.enhanced_metrics.set_tmp_enhanced_metrics(tmp_chan_rx); + // Start a channel for monitoring file descriptor and thread count + let (process_chan_tx, process_chan_rx) = watch::channel(()); + self.enhanced_metrics + .set_process_enhanced_metrics(process_chan_rx); + let enhanced_metric_offsets = Some(EnhancedMetricData { network_offset, cpu_offset, uptime_offset, tmp_chan_tx, + process_chan_tx, }); self.context_buffer .add_enhanced_metric_data(&request_id, enhanced_metric_offsets); @@ -196,6 +202,8 @@ impl Processor { ); // Send the signal to stop monitoring tmp _ = offsets.tmp_chan_tx.send(()); + // Send the signal to stop monitoring file descriptors and threads + _ = offsets.process_chan_tx.send(()); } } diff --git a/bottlecap/src/metrics/enhanced/constants.rs b/bottlecap/src/metrics/enhanced/constants.rs index f62fd67ea..fcd833676 100644 --- a/bottlecap/src/metrics/enhanced/constants.rs +++ b/bottlecap/src/metrics/enhanced/constants.rs @@ -38,5 +38,9 @@ pub const CPU_MIN_UTILIZATION_METRIC: &str = "aws.lambda.enhanced.cpu_min_utiliz pub const TMP_MAX_METRIC: &str = "aws.lambda.enhanced.tmp_max"; pub const TMP_USED_METRIC: &str = "aws.lambda.enhanced.tmp_used"; pub const TMP_FREE_METRIC: &str = "aws.lambda.enhanced.tmp_free"; +pub const FD_MAX_METRIC: &str = "aws.lambda.enhanced.fd_max"; +pub const FD_USE_METRIC: &str = "aws.lambda.enhanced.fd_use"; +pub const THREADS_MAX_METRIC: &str = "aws.lambda.enhanced.threads_max"; +pub const THREADS_USE_METRIC: &str = "aws.lambda.enhanced.threads_use"; //pub const ASM_INVOCATIONS_METRIC: &str = "aws.lambda.enhanced.asm.invocations"; pub const ENHANCED_METRICS_ENV_VAR: &str = "DD_ENHANCED_METRICS"; diff --git a/bottlecap/src/metrics/enhanced/lambda.rs b/bottlecap/src/metrics/enhanced/lambda.rs index 6e4f53c72..3d999062f 100644 --- a/bottlecap/src/metrics/enhanced/lambda.rs +++ b/bottlecap/src/metrics/enhanced/lambda.rs @@ -429,6 +429,115 @@ impl Lambda { }); } + pub fn generate_fd_enhanced_metrics( + fd_max: f64, + fd_use: f64, + aggr: &mut std::sync::MutexGuard, + ) { + let metric = Metric::new( + constants::FD_MAX_METRIC.into(), + MetricValue::distribution(fd_max), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert fd_max metric: {}", e); + } + + // Check if fd_use value is valid before inserting metric + if fd_use > 0.0 { + let metric = Metric::new( + constants::FD_USE_METRIC.into(), + MetricValue::distribution(fd_use), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert fd_use metric: {}", e); + } + } + } + + pub fn generate_threads_enhanced_metrics( + threads_max: f64, + threads_use: f64, + aggr: &mut std::sync::MutexGuard, + ) { + let metric = Metric::new( + constants::THREADS_MAX_METRIC.into(), + MetricValue::distribution(threads_max), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert threads_max metric: {}", e); + } + + // Check if threads_use value is valid before inserting metric + if threads_use > 0.0 { + let metric = Metric::new( + constants::THREADS_USE_METRIC.into(), + MetricValue::distribution(threads_use), + None, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert threads_use metric: {}", e); + } + } + } + + pub fn set_process_enhanced_metrics(&self, mut send_metrics: Receiver<()>) { + if !self.config.enhanced_metrics { + return; + } + + let aggr = Arc::clone(&self.aggregator); + + tokio::spawn(async move { + // get list of all process ids + let pids = proc::get_pid_list(); + + // Set fd_max and initial value for fd_use to -1 + let fd_max = proc::get_fd_max_data(&pids); + let mut fd_use = -1_f64; + + // Set threads_max and initial value for threads_use to -1 + let threads_max = proc::get_threads_max_data(&pids); + let mut threads_use = -1_f64; + + let mut interval = interval(Duration::from_millis(1)); + loop { + tokio::select! { + biased; + // When the stop signal is received, generate final metrics + _ = send_metrics.changed() => { + let mut aggr: std::sync::MutexGuard = + aggr.lock().expect("lock poisoned"); + Self::generate_fd_enhanced_metrics(fd_max, fd_use, &mut aggr); + Self::generate_threads_enhanced_metrics(threads_max, threads_use, &mut aggr); + return; + } + // Otherwise keep monitoring file descriptor and thread usage periodically + _ = interval.tick() => { + match proc::get_fd_use_data(&pids) { + Ok(fd_use_curr) => { + fd_use = fd_use.max(fd_use_curr); + }, + Err(_) => { + debug!("Could not update file descriptor use enhanced metric."); + } + }; + match proc::get_threads_use_data(&pids) { + Ok(threads_use_curr) => { + threads_use = threads_use.max(threads_use_curr); + }, + Err(_) => { + debug!("Could not update threads use enhanced metric."); + } + }; + } + } + } + }); + } + fn calculate_estimated_cost_usd(billed_duration_ms: u64, memory_size_mb: u64) -> f64 { let gb_seconds = (billed_duration_ms as f64 * constants::MS_TO_SEC) * (memory_size_mb as f64 / constants::MB_TO_GB); @@ -503,6 +612,7 @@ pub struct EnhancedMetricData { pub cpu_offset: Option, pub uptime_offset: Option, pub tmp_chan_tx: Sender<()>, + pub process_chan_tx: Sender<()>, } impl PartialEq for EnhancedMetricData { @@ -669,6 +779,18 @@ mod tests { assert!(aggr .get_entry_by_id(constants::TMP_FREE_METRIC.into(), &None) .is_none()); + assert!(aggr + .get_entry_by_id(constants::FD_MAX_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::FD_USE_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::THREADS_MAX_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::THREADS_USE_METRIC.into(), &None) + .is_none()); } #[test] @@ -818,4 +940,84 @@ mod tests { assert_sketch(&metrics_aggr, constants::TMP_USED_METRIC, 12165120.0); assert_sketch(&metrics_aggr, constants::TMP_FREE_METRIC, 538296320.0); } + + #[test] + fn test_set_fd_enhanced_metrics_valid_fd_use() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let fd_max = 1024.0; + let fd_use = 175.0; + + Lambda::generate_fd_enhanced_metrics( + fd_max, + fd_use, + &mut lambda.aggregator.lock().expect("lock poisoned"), + ); + + assert_sketch(&metrics_aggr, constants::FD_MAX_METRIC, 1024.0); + assert_sketch(&metrics_aggr, constants::FD_USE_METRIC, 175.0); + } + + #[test] + fn test_set_fd_enhanced_metrics_invalid_fd_use() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let fd_max = 1024.0; + let fd_use = -1.0; + + Lambda::generate_fd_enhanced_metrics( + fd_max, + fd_use, + &mut lambda.aggregator.lock().expect("lock poisoned"), + ); + + assert_sketch(&metrics_aggr, constants::FD_MAX_METRIC, 1024.0); + + let aggr = lambda.aggregator.lock().expect("lock poisoned"); + assert!(aggr + .get_entry_by_id(constants::FD_USE_METRIC.into(), &None) + .is_none()); + } + + #[test] + fn test_set_threads_enhanced_metrics_valid_threads_use() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let threads_max = 1024.0; + let threads_use = 40.0; + + Lambda::generate_threads_enhanced_metrics( + threads_max, + threads_use, + &mut lambda.aggregator.lock().expect("lock poisoned"), + ); + + assert_sketch(&metrics_aggr, constants::THREADS_MAX_METRIC, 1024.0); + assert_sketch(&metrics_aggr, constants::THREADS_USE_METRIC, 40.0); + } + + #[test] + fn test_set_threads_enhanced_metrics_invalid_threads_use() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let threads_max = 1024.0; + let threads_use = -1.0; + + Lambda::generate_threads_enhanced_metrics( + threads_max, + threads_use, + &mut lambda.aggregator.lock().expect("lock poisoned"), + ); + + assert_sketch(&metrics_aggr, constants::THREADS_MAX_METRIC, 1024.0); + + let aggr = lambda.aggregator.lock().expect("lock poisoned"); + assert!(aggr + .get_entry_by_id(constants::THREADS_USE_METRIC.into(), &None) + .is_none()); + } } diff --git a/bottlecap/src/proc/constants.rs b/bottlecap/src/proc/constants.rs index fe06b908d..452cdf4fb 100644 --- a/bottlecap/src/proc/constants.rs +++ b/bottlecap/src/proc/constants.rs @@ -1,5 +1,8 @@ pub const PROC_NET_DEV_PATH: &str = "/proc/net/dev"; pub const PROC_STAT_PATH: &str = "/proc/stat"; pub const PROC_UPTIME_PATH: &str = "/proc/uptime"; +pub const PROC_PATH: &str = "/proc"; pub const LAMDBA_NETWORK_INTERFACE: &str = "vinternal_1"; +pub const LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT: f64 = 1024.0; +pub const LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT: f64 = 1024.0; diff --git a/bottlecap/src/proc/mod.rs b/bottlecap/src/proc/mod.rs index 3dfa1a67f..2aaf17474 100644 --- a/bottlecap/src/proc/mod.rs +++ b/bottlecap/src/proc/mod.rs @@ -3,11 +3,43 @@ pub mod constants; use std::{ collections::HashMap, - fs::File, + fs::{self, File}, io::{self, BufRead}, }; -use constants::{LAMDBA_NETWORK_INTERFACE, PROC_NET_DEV_PATH, PROC_STAT_PATH, PROC_UPTIME_PATH}; +use constants::{ + LAMDBA_NETWORK_INTERFACE, PROC_NET_DEV_PATH, PROC_PATH, PROC_STAT_PATH, PROC_UPTIME_PATH, +}; +use regex::Regex; +use tracing::debug; + +#[must_use] +pub fn get_pid_list() -> Vec { + get_pid_list_from_path(PROC_PATH) +} + +pub fn get_pid_list_from_path(path: &str) -> Vec { + let mut pids = Vec::::new(); + + let Ok(entries) = fs::read_dir(path) else { + debug!("Could not list /proc files"); + return pids; + }; + + pids.extend(entries.filter_map(|entry| { + entry.ok().and_then(|dir_entry| { + // Check if the entry is a directory + if dir_entry.file_type().ok()?.is_dir() { + // If the directory name can be parsed as an integer, it will be added to the list + dir_entry.file_name().to_str()?.parse::().ok() + } else { + None + } + }) + })); + + pids +} #[derive(Copy, Clone, Debug, PartialEq)] pub struct NetworkData { @@ -180,12 +212,126 @@ fn get_uptime_from_path(path: &str) -> Result { )) } +#[must_use] +pub fn get_fd_max_data(pids: &[i64]) -> f64 { + get_fd_max_data_from_path(PROC_PATH, pids) +} + +fn get_fd_max_data_from_path(path: &str, pids: &[i64]) -> f64 { + let mut fd_max = constants::LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT; + // regex to capture the soft limit value (first numeric value after the title) + let re = Regex::new(r"^Max open files\s+(\d+)").expect("Failed to create regex"); + + for &pid in pids { + let limits_path = format!("{path}/{pid}/limits"); + let Ok(file) = File::open(&limits_path) else { + continue; + }; + + let reader = io::BufReader::new(file); + for line in reader.lines().map_while(Result::ok) { + if let Some(line_items) = re.captures(&line) { + if let Ok(fd_max_pid) = line_items[1].parse() { + fd_max = fd_max.min(fd_max_pid); + } else { + debug!("File descriptor max data not found in file {}", limits_path); + } + break; + } + } + } + + fd_max +} + +pub fn get_fd_use_data(pids: &[i64]) -> Result { + get_fd_use_data_from_path(PROC_PATH, pids) +} + +fn get_fd_use_data_from_path(path: &str, pids: &[i64]) -> Result { + let mut fd_use = 0; + + for &pid in pids { + let fd_path = format!("{path}/{pid}/fd"); + let Ok(files) = fs::read_dir(fd_path) else { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "File descriptor use data not found", + )); + }; + let count = files.count(); + fd_use += count; + } + + Ok(fd_use as f64) +} + +#[must_use] +pub fn get_threads_max_data(pids: &[i64]) -> f64 { + get_threads_max_data_from_path(PROC_PATH, pids) +} + +fn get_threads_max_data_from_path(path: &str, pids: &[i64]) -> f64 { + let mut threads_max = constants::LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT; + // regex to capture the soft limit value (first numeric value after the title) + let re = Regex::new(r"^Max processes\s+(\d+)").expect("Failed to create regex"); + + for &pid in pids { + let limits_path = format!("{path}/{pid}/limits"); + let Ok(file) = File::open(&limits_path) else { + continue; + }; + + let reader = io::BufReader::new(file); + for line in reader.lines().map_while(Result::ok) { + if let Some(line_items) = re.captures(&line) { + if let Ok(threads_max_pid) = line_items[1].parse() { + threads_max = threads_max.min(threads_max_pid); + } else { + debug!("Threads max data not found in file {}", limits_path); + } + break; + } + } + } + + threads_max +} + +pub fn get_threads_use_data(pids: &[i64]) -> Result { + get_threads_use_data_from_path(PROC_PATH, pids) +} + +fn get_threads_use_data_from_path(path: &str, pids: &[i64]) -> Result { + let mut threads_use = 0; + + for &pid in pids { + let task_path = format!("{path}/{pid}/task"); + let Ok(files) = fs::read_dir(task_path) else { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Threads use data not found", + )); + }; + + threads_use += files + .flatten() + .filter_map(|dir_entry| dir_entry.file_type().ok()) + .filter(fs::FileType::is_dir) + .count(); + } + + Ok(threads_use as f64) +} + #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { use super::*; use std::path::PathBuf; + const PRECISION: f64 = 1e-6; + fn path_from_root(file: &str) -> String { let mut safe_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); safe_path.push(file); @@ -193,14 +339,27 @@ mod tests { } #[test] - #[allow(clippy::float_cmp)] + fn test_get_pid_list() { + let path = "./tests/proc"; + let mut pids = get_pid_list_from_path(path); + pids.sort(); + assert_eq!(pids.len(), 2); + assert_eq!(pids[0], 13); + assert_eq!(pids[1], 142); + + let path = "./tests/incorrect_folder"; + let pids = get_pid_list_from_path(path); + assert_eq!(pids.len(), 0); + } + + #[test] fn test_get_network_data() { let path = "./tests/proc/net/valid_dev"; let network_data_result = get_network_data_from_path(path_from_root(path).as_str()); assert!(network_data_result.is_ok()); - let network_data_result = network_data_result.unwrap(); - assert_eq!(network_data_result.rx_bytes, 180.0); - assert_eq!(network_data_result.tx_bytes, 254.0); + let network_data = network_data_result.unwrap(); + assert!((network_data.rx_bytes - 180.0).abs() < PRECISION); + assert!((network_data.tx_bytes - 254.0).abs() < PRECISION); let path = "./tests/proc/net/invalid_dev_malformed"; let network_data_result = get_network_data_from_path(path); @@ -220,29 +379,32 @@ mod tests { } #[test] - #[allow(clippy::float_cmp)] fn test_get_cpu_data() { let path = "./tests/proc/stat/valid_stat"; let cpu_data_result = get_cpu_data_from_path(path_from_root(path).as_str()); assert!(cpu_data_result.is_ok()); let cpu_data = cpu_data_result.unwrap(); - assert_eq!(cpu_data.total_user_time_ms, 23370.0); - assert_eq!(cpu_data.total_system_time_ms, 1880.0); - assert_eq!(cpu_data.total_idle_time_ms, 178_380.0); + assert!((cpu_data.total_user_time_ms - 23370.0).abs() < PRECISION); + assert!((cpu_data.total_system_time_ms - 1880.0).abs() < PRECISION); + assert!((cpu_data.total_idle_time_ms - 178_380.0).abs() < PRECISION); assert_eq!(cpu_data.individual_cpu_idle_times.len(), 2); - assert_eq!( - *cpu_data + assert!( + (*cpu_data .individual_cpu_idle_times .get("cpu0") - .expect("cpu0 not found"), - 91880.0 + .expect("cpu0 not found") + - 91880.0) + .abs() + < PRECISION ); - assert_eq!( - *cpu_data + assert!( + (*cpu_data .individual_cpu_idle_times .get("cpu1") - .expect("cpu1 not found"), - 86490.0 + .expect("cpu1 not found") + - 86490.0) + .abs() + < PRECISION ); let path = "./tests/proc/stat/invalid_stat_non_numerical_value_1"; @@ -271,13 +433,12 @@ mod tests { } #[test] - #[allow(clippy::float_cmp)] fn test_get_uptime_data() { let path = "./tests/proc/uptime/valid_uptime"; let uptime_data_result = get_uptime_from_path(path_from_root(path).as_str()); assert!(uptime_data_result.is_ok()); let uptime_data = uptime_data_result.unwrap(); - assert_eq!(uptime_data, 3_213_103_123_000.0); + assert!((uptime_data - 3_213_103_123_000.0).abs() < PRECISION); let path = "./tests/proc/uptime/invalid_data_uptime"; let uptime_data_result = get_uptime_from_path(path); @@ -291,4 +452,72 @@ mod tests { let uptime_data_result = get_uptime_from_path(path); assert!(uptime_data_result.is_err()); } + + #[test] + fn test_get_fd_max_data() { + let path = "./tests/proc/process/valid"; + let pids = get_pid_list_from_path(path); + let fd_max = get_fd_max_data_from_path(path, &pids); + assert!((fd_max - 900.0).abs() < PRECISION); + + let path = "./tests/proc/process/invalid_malformed"; + let fd_max = get_fd_max_data_from_path(path, &pids); + // assert that fd_max is equal to AWS Lambda limit + assert!((fd_max - constants::LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT).abs() < PRECISION); + + let path = "./tests/proc/process/invalid_missing"; + let fd_max = get_fd_max_data_from_path(path, &pids); + // assert that fd_max is equal to AWS Lambda limit + assert!((fd_max - constants::LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT).abs() < PRECISION); + } + + #[test] + fn test_get_fd_use_data() { + let path = "./tests/proc/process/valid"; + let pids = get_pid_list_from_path(path); + let fd_use_result = get_fd_use_data_from_path(path, &pids); + assert!(fd_use_result.is_ok()); + let fd_use = fd_use_result.unwrap(); + assert!((fd_use - 5.0).abs() < PRECISION); + + let path = "./tests/proc/process/invalid_missing"; + let fd_use_result = get_fd_use_data_from_path(path, &pids); + assert!(fd_use_result.is_err()); + } + + #[test] + fn test_get_threads_max_data() { + let path = "./tests/proc/process/valid"; + let pids = get_pid_list_from_path(path); + let threads_max = get_threads_max_data_from_path(path, &pids); + assert!((threads_max - 1024.0).abs() < PRECISION); + + let path = "./tests/proc/process/invalid_malformed"; + let threads_max = get_threads_max_data_from_path(path, &pids); + // assert that threads_max is equal to AWS Lambda limit + assert!( + (threads_max - constants::LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT).abs() < PRECISION + ); + + let path = "./tests/proc/process/invalid_missing"; + let threads_max = get_threads_max_data_from_path(path, &pids); + // assert that threads_max is equal to AWS Lambda limit + assert!( + (threads_max - constants::LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT).abs() < PRECISION + ); + } + + #[test] + fn test_get_threads_use_data() { + let path = "./tests/proc/process/valid"; + let pids = get_pid_list_from_path(path); + let threads_use_result = get_threads_use_data_from_path(path, &pids); + assert!(threads_use_result.is_ok()); + let threads_use = threads_use_result.unwrap(); + assert!((threads_use - 5.0).abs() < PRECISION); + + let path = "./tests/proc/process/invalid_missing"; + let threads_use_result = get_threads_use_data_from_path(path, &pids); + assert!(threads_use_result.is_err()); + } } diff --git a/bottlecap/tests/proc/13/.gitkeep b/bottlecap/tests/proc/13/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/142/.gitkeep b/bottlecap/tests/proc/142/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/invalid_malformed/31/limits b/bottlecap/tests/proc/process/invalid_malformed/31/limits new file mode 100644 index 000000000..2d25ac301 --- /dev/null +++ b/bottlecap/tests/proc/process/invalid_malformed/31/limits @@ -0,0 +1,17 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max processes 1024 1024 +Max open files 1024 +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/invalid_malformed/9/limits b/bottlecap/tests/proc/process/invalid_malformed/9/limits new file mode 100644 index 000000000..2436ec085 --- /dev/null +++ b/bottlecap/tests/proc/process/invalid_malformed/9/limits @@ -0,0 +1,17 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max processes 1024 +Max open files 1024 1024 +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/invalid_missing/31/limits b/bottlecap/tests/proc/process/invalid_missing/31/limits new file mode 100644 index 000000000..c7dc2c55d --- /dev/null +++ b/bottlecap/tests/proc/process/invalid_missing/31/limits @@ -0,0 +1,15 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/invalid_missing/9/limits b/bottlecap/tests/proc/process/invalid_missing/9/limits new file mode 100644 index 000000000..07de49ec4 --- /dev/null +++ b/bottlecap/tests/proc/process/invalid_missing/9/limits @@ -0,0 +1,15 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/valid/31/fd/1 b/bottlecap/tests/proc/process/valid/31/fd/1 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/31/fd/2 b/bottlecap/tests/proc/process/valid/31/fd/2 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/31/limits b/bottlecap/tests/proc/process/valid/31/limits new file mode 100644 index 000000000..75d41eee4 --- /dev/null +++ b/bottlecap/tests/proc/process/valid/31/limits @@ -0,0 +1,17 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max processes 1024 1024 processes +Max open files 900 1024 files +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/valid/31/task/1/.gitkeep b/bottlecap/tests/proc/process/valid/31/task/1/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/31/task/2/.gitkeep b/bottlecap/tests/proc/process/valid/31/task/2/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/31/task/3 b/bottlecap/tests/proc/process/valid/31/task/3 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/fd/1 b/bottlecap/tests/proc/process/valid/9/fd/1 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/fd/2 b/bottlecap/tests/proc/process/valid/9/fd/2 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/fd/3 b/bottlecap/tests/proc/process/valid/9/fd/3 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/limits b/bottlecap/tests/proc/process/valid/9/limits new file mode 100644 index 000000000..664f04c88 --- /dev/null +++ b/bottlecap/tests/proc/process/valid/9/limits @@ -0,0 +1,17 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max processes 1024 1024 processes +Max open files 1024 1024 files +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/valid/9/task/1/.gitkeep b/bottlecap/tests/proc/process/valid/9/task/1/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/task/2/.gitkeep b/bottlecap/tests/proc/process/valid/9/task/2/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/task/3/.gitkeep b/bottlecap/tests/proc/process/valid/9/task/3/.gitkeep new file mode 100644 index 000000000..e69de29bb From 7cb745c22018095bebc2940d853be6d3a0b16453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Thu, 14 Nov 2024 19:51:30 -0500 Subject: [PATCH 33/41] feat(bottlecap): add Cold Start Span + Tags (#450) * add some helper functions to `invocation::lifecycle` mod * create cold start span on processor * move `generate_span_id` to father module * send `platform_init_start` data to processor * send `PlatformInitStart` to main bus * update cold start `parent_id` * fix start time of cold start span * enhanced metrics now have a `dynamic_value_tags` for tags which we have to calculate at points in time * `AwsConfig` now has a `sandbox_init_time` value * add `is_empty` to `ContextBuffer` * calculate init tags on invoke also add a method to reset processor invocation state * restart init tags on set * set tags properly for proactive init * fix unit test * remove debug line * make sure `cold_start` tag is only set in one place --- bottlecap/src/bin/bottlecap/main.rs | 11 +- bottlecap/src/config/mod.rs | 2 + bottlecap/src/lifecycle/invocation/context.rs | 7 + bottlecap/src/lifecycle/invocation/mod.rs | 22 +++ .../src/lifecycle/invocation/processor.rs | 145 ++++++++++++++---- .../src/lifecycle/invocation/span_inferrer.rs | 48 +++--- bottlecap/src/logs/lambda/processor.rs | 6 +- bottlecap/src/metrics/enhanced/lambda.rs | 124 +++++++++++---- bottlecap/src/secrets/decrypt.rs | 1 + 9 files changed, 271 insertions(+), 95 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index a1c78ec50..b5c4b31e5 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -60,6 +60,7 @@ use std::{ path::Path, process::Command, sync::{Arc, Mutex}, + time::Instant, }; use telemetry::listener::TelemetryListenerConfig; use tokio::sync::mpsc::Sender; @@ -201,6 +202,7 @@ fn load_configs() -> (AwsConfig, Arc) { aws_secret_access_key: env::var("AWS_SECRET_ACCESS_KEY").unwrap_or_default(), aws_session_token: env::var("AWS_SESSION_TOKEN").unwrap_or_default(), function_name: env::var("AWS_LAMBDA_FUNCTION_NAME").unwrap_or_default(), + sandbox_init_time: Instant::now(), }; let lambda_directory = env::var("LAMBDA_TASK_ROOT").unwrap_or_else(|_| "/var/task".to_string()); let config = match config::get_config(Path::new(&lambda_directory)) { @@ -401,9 +403,9 @@ async fn extension_loop_active( } Event::Telemetry(event) => match event.record { - TelemetryRecord::PlatformStart { request_id, .. } => { + TelemetryRecord::PlatformInitStart { .. } => { let mut p = invocation_processor.lock().await; - p.on_platform_start(request_id, event.time); + p.on_platform_init_start(event.time); drop(p); } TelemetryRecord::PlatformInitReport { @@ -416,6 +418,11 @@ async fn extension_loop_active( p.on_platform_init_report(metrics.duration_ms); drop(p); } + TelemetryRecord::PlatformStart { request_id, .. } => { + let mut p = invocation_processor.lock().await; + p.on_platform_start(request_id, event.time); + drop(p); + } TelemetryRecord::PlatformRuntimeDone { request_id, status, diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index df2389912..d1cdd8e5a 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -4,6 +4,7 @@ pub mod processing_rule; pub mod trace_propagation_style; use std::path::Path; +use std::time::Instant; use std::vec; use figment::providers::{Format, Yaml}; @@ -226,6 +227,7 @@ pub struct AwsConfig { pub aws_secret_access_key: String, pub aws_session_token: String, pub function_name: String, + pub sandbox_init_time: Instant, } #[cfg(test)] diff --git a/bottlecap/src/lifecycle/invocation/context.rs b/bottlecap/src/lifecycle/invocation/context.rs index 7010e245a..680a967ba 100644 --- a/bottlecap/src/lifecycle/invocation/context.rs +++ b/bottlecap/src/lifecycle/invocation/context.rs @@ -165,6 +165,13 @@ impl ContextBuffer { pub fn size(&self) -> usize { self.buffer.len() } + + /// Returns if the buffer is empty. + /// + #[must_use] + pub fn is_empty(&self) -> bool { + self.buffer.is_empty() + } } #[cfg(test)] diff --git a/bottlecap/src/lifecycle/invocation/mod.rs b/bottlecap/src/lifecycle/invocation/mod.rs index aca184c2b..b62a757d9 100644 --- a/bottlecap/src/lifecycle/invocation/mod.rs +++ b/bottlecap/src/lifecycle/invocation/mod.rs @@ -1,5 +1,8 @@ use base64::{engine::general_purpose, DecodeError, Engine}; use datadog_trace_protobuf::pb::Span; +use rand::{rngs::OsRng, Rng, RngCore}; + +use crate::tags::lambda::tags::{INIT_TYPE, SNAP_START_VALUE}; use serde_json::Value; use tracing::debug; @@ -27,6 +30,25 @@ pub fn base64_to_string(base64_string: &str) -> Result { } } +fn create_empty_span(name: String, resource: String, service: String) -> Span { + Span { + name, + resource, + service, + r#type: String::from("serverless"), + ..Default::default() + } +} + +fn generate_span_id() -> u64 { + if std::env::var(INIT_TYPE).map_or(false, |it| it == SNAP_START_VALUE) { + return OsRng.next_u64(); + } + + let mut rng = rand::thread_rng(); + rng.gen() +} + pub fn tag_span_from_value(span: &mut Span, key: &str, value: &Value, depth: u32, max_depth: u32) { // Null scenario if value.is_null() { diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index a94164a18..eb9e00b6c 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -1,7 +1,7 @@ use std::{ collections::HashMap, sync::{Arc, Mutex}, - time::{SystemTime, UNIX_EPOCH}, + time::{Instant, SystemTime, UNIX_EPOCH}, }; use chrono::{DateTime, Utc}; @@ -15,7 +15,8 @@ use tracing::debug; use crate::{ config::{self, AwsConfig}, lifecycle::invocation::{ - base64_to_string, context::ContextBuffer, span_inferrer::SpanInferrer, tag_span_from_value, + base64_to_string, context::ContextBuffer, create_empty_span, generate_span_id, + span_inferrer::SpanInferrer, tag_span_from_value, }, metrics::enhanced::lambda::{EnhancedMetricData, Lambda as EnhancedMetrics}, proc::{self, CPUData, NetworkData}, @@ -36,6 +37,7 @@ use crate::{ pub const MS_TO_NS: f64 = 1_000_000.0; pub const S_TO_NS: f64 = 1_000_000_000.0; +pub const PROACTIVE_INITIALIZATION_THRESHOLD_MS: u64 = 10_000; pub const DATADOG_INVOCATION_ERROR_MESSAGE_KEY: &str = "x-datadog-invocation-error-msg"; pub const DATADOG_INVOCATION_ERROR_TYPE_KEY: &str = "x-datadog-invocation-error-type"; @@ -43,14 +45,23 @@ pub const DATADOG_INVOCATION_ERROR_STACK_KEY: &str = "x-datadog-invocation-error pub const DATADOG_INVOCATION_ERROR_KEY: &str = "x-datadog-invocation-error"; pub struct Processor { + // Buffer containing context of the previous 5 invocations pub context_buffer: ContextBuffer, + // Helper to infer span information inferrer: SpanInferrer, + // Current invocation span pub span: Span, + // Cold start span + cold_start_span: Option, + // Extracted span context from inferred span, headers, or payload pub extracted_span_context: Option, // Used to extract the trace context from inferred span, headers, or payload propagator: DatadogCompositePropagator, + // Helper to send enhanced metrics enhanced_metrics: EnhancedMetrics, + // AWS configuration from the Lambda environment aws_config: AwsConfig, + // Flag to determine if a tracer was detected tracer_detected: bool, config: Arc, } @@ -63,32 +74,18 @@ impl Processor { aws_config: &AwsConfig, metrics_aggregator: Arc>, ) -> Self { - let service = config.service.clone().unwrap_or("aws.lambda".to_string()); + let service = config.service.clone().unwrap_or(String::from("aws.lambda")); let resource = tags_provider .get_canonical_resource_name() - .unwrap_or("aws_lambda".to_string()); + .unwrap_or(String::from("aws.lambda")); let propagator = DatadogCompositePropagator::new(Arc::clone(&config)); Processor { context_buffer: ContextBuffer::default(), inferrer: SpanInferrer::default(), - span: Span { - service, - name: "aws.lambda".to_string(), - resource, - trace_id: 0, - span_id: 0, - parent_id: 0, - start: 0, - duration: 0, - error: 0, - meta: HashMap::new(), - metrics: HashMap::new(), - r#type: "serverless".to_string(), - meta_struct: HashMap::new(), - span_links: Vec::new(), - }, + span: create_empty_span(String::from("aws.lambda"), resource, service), + cold_start_span: None, extracted_span_context: None, propagator, enhanced_metrics: EnhancedMetrics::new(metrics_aggregator, Arc::clone(&config)), @@ -101,6 +98,9 @@ impl Processor { /// Given a `request_id`, creates the context and adds the enhanced metric offsets to the context buffer. /// pub fn on_invoke_event(&mut self, request_id: String) { + self.reset_state(); + self.set_init_tags(); + self.context_buffer.create_context(request_id.clone()); if self.config.enhanced_metrics { // Collect offsets for network and cpu metrics @@ -132,10 +132,87 @@ impl Processor { self.enhanced_metrics.increment_invocation_metric(); } + /// Resets the state of the processor to default values. + /// + fn reset_state(&mut self) { + // Reset Span Context on Span + self.span.trace_id = 0; + self.span.parent_id = 0; + self.span.span_id = 0; + // Error + self.span.error = 0; + // Meta tags + self.span.meta.clear(); + // Extracted Span Context + self.extracted_span_context = None; + // Cold Start Span + self.cold_start_span = None; + } + + /// On the first invocation, determine if it's a cold start or proactive init. + /// + /// For every other invocation, it will always be warm start. + /// + fn set_init_tags(&mut self) { + let mut proactive_initialization = false; + let mut cold_start = false; + + // If it's empty, then we are in a cold start + if self.context_buffer.is_empty() { + let now = Instant::now(); + let time_since_sandbox_init = now.duration_since(self.aws_config.sandbox_init_time); + if time_since_sandbox_init.as_millis() > PROACTIVE_INITIALIZATION_THRESHOLD_MS.into() { + proactive_initialization = true; + } else { + cold_start = true; + } + } + + if proactive_initialization { + self.span.meta.insert( + String::from("proactive_initialization"), + proactive_initialization.to_string(), + ); + } + self.span + .meta + .insert(String::from("cold_start"), cold_start.to_string()); + + self.enhanced_metrics + .set_init_tags(proactive_initialization, cold_start); + } + + pub fn on_platform_init_start(&mut self, time: DateTime) { + // Create a cold start span + let mut cold_start_span = create_empty_span( + String::from("aws.lambda.cold_start"), + self.span.resource.clone(), + self.span.service.clone(), + ); + + let start_time: i64 = SystemTime::from(time) + .duration_since(UNIX_EPOCH) + .expect("time went backwards") + .as_nanos() + .try_into() + .unwrap_or_default(); + + cold_start_span.span_id = generate_span_id(); + cold_start_span.start = start_time; + + self.cold_start_span = Some(cold_start_span); + } + /// Given the duration of the platform init report, set the init duration metric. /// + #[allow(clippy::cast_possible_truncation)] pub fn on_platform_init_report(&mut self, duration_ms: f64) { self.enhanced_metrics.set_init_duration_metric(duration_ms); + + if let Some(cold_start_span) = &mut self.cold_start_span { + // `round` is intentionally meant to be a whole integer + cold_start_span.duration = (duration_ms * MS_TO_NS) as i64; + } } /// Given a `request_id` and the time of the platform start, add the start time to the context buffer. @@ -183,10 +260,10 @@ impl Processor { } if let Some(context) = self.context_buffer.get(request_id) { - let span = &mut self.span; // `round` is intentionally meant to be a whole integer - span.duration = (context.runtime_duration_ms * MS_TO_NS).round() as i64; - span.meta + self.span.duration = (context.runtime_duration_ms * MS_TO_NS).round() as i64; + self.span + .meta .insert("request_id".to_string(), request_id.clone()); // todo(duncanista): add missing tags // - cold start, proactive init @@ -213,6 +290,11 @@ impl Processor { self.inferrer.complete_inferred_spans(&self.span); + if let Some(cold_start_span) = &mut self.cold_start_span { + cold_start_span.trace_id = self.span.trace_id; + cold_start_span.parent_id = self.span.parent_id; + } + if self.tracer_detected { let mut body_size = std::mem::size_of_val(&self.span); let mut traces = vec![self.span.clone()]; @@ -227,6 +309,11 @@ impl Processor { traces.push(ws.clone()); } + if let Some(cold_start_span) = &self.cold_start_span { + body_size += std::mem::size_of_val(cold_start_span); + traces.push(cold_start_span.clone()); + } + // todo: figure out what to do here let header_tags = tracer_header_tags::TracerHeaderTags { lang: "", @@ -263,7 +350,7 @@ impl Processor { // Set the report log metrics self.enhanced_metrics.set_report_log_metrics(&metrics); - if let Some(context) = self.context_buffer.remove(request_id) { + if let Some(context) = self.context_buffer.get(request_id) { if context.runtime_duration_ms != 0.0 { let post_runtime_duration_ms = metrics.duration_ms - context.runtime_duration_ms; @@ -273,7 +360,7 @@ impl Processor { } // Set Network and CPU time metrics - if let Some(offsets) = context.enhanced_metric_data { + if let Some(offsets) = context.enhanced_metric_data.clone() { self.enhanced_metrics .set_network_enhanced_metrics(offsets.network_offset); self.enhanced_metrics @@ -288,14 +375,6 @@ impl Processor { pub fn on_invocation_start(&mut self, headers: HashMap, payload: Vec) { self.tracer_detected = true; - // Reset trace context - self.span.trace_id = 0; - self.span.parent_id = 0; - self.span.span_id = 0; - self.span.error = 0; - self.span.meta.clear(); - self.extracted_span_context = None; - let payload_value = match serde_json::from_slice::(&payload) { Ok(value) => value, Err(_) => json!({}), diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index edcdabe55..f68134f46 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -1,26 +1,27 @@ use std::collections::HashMap; use datadog_trace_protobuf::pb::Span; -use rand::{rngs::OsRng, Rng, RngCore}; use serde_json::Value; use tracing::debug; use crate::config::AwsConfig; -use crate::lifecycle::invocation::triggers::{ - api_gateway_http_event::APIGatewayHttpEvent, - api_gateway_rest_event::APIGatewayRestEvent, - dynamodb_event::DynamoDbRecord, - event_bridge_event::EventBridgeEvent, - kinesis_event::KinesisRecord, - lambda_function_url_event::LambdaFunctionUrlEvent, - s3_event::S3Record, - sns_event::{SnsEntity, SnsRecord}, - sqs_event::SqsRecord, - step_function_event::StepFunctionEvent, - Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, +use crate::lifecycle::invocation::{ + generate_span_id, + triggers::{ + api_gateway_http_event::APIGatewayHttpEvent, + api_gateway_rest_event::APIGatewayRestEvent, + dynamodb_event::DynamoDbRecord, + event_bridge_event::EventBridgeEvent, + kinesis_event::KinesisRecord, + lambda_function_url_event::LambdaFunctionUrlEvent, + s3_event::S3Record, + sns_event::{SnsEntity, SnsRecord}, + sqs_event::SqsRecord, + step_function_event::StepFunctionEvent, + Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, + }, }; -use crate::tags::lambda::tags::{INIT_TYPE, SNAP_START_VALUE}; use crate::traces::{context::SpanContext, propagation::Propagator}; pub struct SpanInferrer { @@ -72,7 +73,7 @@ impl SpanInferrer { let mut trigger: Option> = None; let mut inferred_span = Span { - span_id: Self::generate_span_id(), + span_id: generate_span_id(), ..Default::default() }; @@ -102,7 +103,7 @@ impl SpanInferrer { if let Ok(sns_entity) = serde_json::from_str::(&t.body) { debug!("Found an SNS event wrapped in the SQS body"); let mut wrapped_inferred_span = Span { - span_id: Self::generate_span_id(), + span_id: generate_span_id(), ..Default::default() }; @@ -121,7 +122,7 @@ impl SpanInferrer { serde_json::from_str::(&t.body) { let mut wrapped_inferred_span = Span { - span_id: Self::generate_span_id(), + span_id: generate_span_id(), ..Default::default() }; @@ -145,7 +146,7 @@ impl SpanInferrer { serde_json::from_str::(message) { let mut wrapped_inferred_span = Span { - span_id: Self::generate_span_id(), + span_id: generate_span_id(), ..Default::default() }; @@ -283,16 +284,7 @@ impl SpanInferrer { } } - fn generate_span_id() -> u64 { - if std::env::var(INIT_TYPE).map_or(false, |it| it == SNAP_START_VALUE) { - return OsRng.next_u64(); - } - - let mut rng = rand::thread_rng(); - rng.gen() - } - - /// Returns the extracted span context + /// Returns a clone of the carrier associated with the inferred span /// /// If the carrier is set, it will try to extract the span context, /// otherwise it will diff --git a/bottlecap/src/logs/lambda/processor.rs b/bottlecap/src/logs/lambda/processor.rs index 6de124be6..d80db8912 100644 --- a/bottlecap/src/logs/lambda/processor.rs +++ b/bottlecap/src/logs/lambda/processor.rs @@ -87,8 +87,13 @@ impl LambdaProcessor { runtime_version_arn, .. // TODO: check if we could do something with this metrics: `initialization_type` and `phase` } => { + if let Err(e) = self.event_bus.send(Event::Telemetry(copy)).await { + error!("Failed to send PlatformInitStart to the main event bus: {}", e); + } + let rv = runtime_version.unwrap_or("?".to_string()); // TODO: check what does containers display let rv_arn = runtime_version_arn.unwrap_or("?".to_string()); // TODO: check what do containers display + Ok(Message::new( format!("INIT_START Runtime Version: {rv} Runtime Version ARN: {rv_arn}"), None, @@ -178,7 +183,6 @@ impl LambdaProcessor { )) }, // TODO: PlatformInitRuntimeDone - // TODO: PlatformInitReport // TODO: PlatformExtension // TODO: PlatformTelemetrySubscription // TODO: PlatformLogsDropped diff --git a/bottlecap/src/metrics/enhanced/lambda.rs b/bottlecap/src/metrics/enhanced/lambda.rs index 3d999062f..a0b02aef7 100644 --- a/bottlecap/src/metrics/enhanced/lambda.rs +++ b/bottlecap/src/metrics/enhanced/lambda.rs @@ -1,10 +1,13 @@ -use super::constants::{self, BASE_LAMBDA_INVOCATION_PRICE}; -use super::statfs::statfs_info; +use crate::metrics::enhanced::{ + constants::{self, BASE_LAMBDA_INVOCATION_PRICE}, + statfs::statfs_info, +}; use crate::proc::{self, CPUData, NetworkData}; use crate::telemetry::events::ReportMetrics; -use dogstatsd::aggregator::Aggregator; use dogstatsd::metric; use dogstatsd::metric::{Metric, MetricValue}; +use dogstatsd::{aggregator::Aggregator, metric::SortedTags}; +use std::collections::HashMap; use std::env::consts::ARCH; use std::sync::{Arc, Mutex}; use std::time::Duration; @@ -18,12 +21,47 @@ use tracing::error; pub struct Lambda { pub aggregator: Arc>, pub config: Arc, + // Dynamic value tags are the ones we cannot obtain statically from the sandbox + dynamic_value_tags: HashMap, } impl Lambda { #[must_use] pub fn new(aggregator: Arc>, config: Arc) -> Lambda { - Lambda { aggregator, config } + Lambda { + aggregator, + config, + dynamic_value_tags: HashMap::new(), + } + } + + /// Set the dynamic value tags that are not available at compile time + pub fn set_init_tags(&mut self, proactive_initialization: bool, cold_start: bool) { + self.dynamic_value_tags.remove("cold_start"); + self.dynamic_value_tags.remove("proactive_initialization"); + + self.dynamic_value_tags + .insert(String::from("cold_start"), cold_start.to_string()); + + // Only set `proactive_initialization` tag if it is true + if proactive_initialization { + self.dynamic_value_tags.insert( + String::from("proactive_initialization"), + String::from("true"), + ); + } + } + + fn get_dynamic_value_tags(&self) -> Option { + let vec_tags: Vec = self + .dynamic_value_tags + .iter() + .map(|(k, v)| format!("{k}:{v}")) + .collect(); + + let string_tags = vec_tags.join(","); + + SortedTags::parse(&string_tags).ok() } pub fn increment_invocation_metric(&self) { @@ -45,7 +83,7 @@ impl Lambda { let metric = Metric::new( constants::INIT_DURATION_METRIC.into(), MetricValue::distribution(init_duration_ms * constants::MS_TO_SEC), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = self @@ -62,7 +100,11 @@ impl Lambda { if !self.config.enhanced_metrics { return; } - let metric = Metric::new(metric_name.into(), MetricValue::distribution(1f64), None); + let metric = Metric::new( + metric_name.into(), + MetricValue::distribution(1f64), + self.get_dynamic_value_tags(), + ); if let Err(e) = self .aggregator .lock() @@ -81,7 +123,7 @@ impl Lambda { constants::RUNTIME_DURATION_METRIC.into(), MetricValue::distribution(duration_ms), // Datadog expects this value as milliseconds, not seconds - None, + self.get_dynamic_value_tags(), ); if let Err(e) = self .aggregator @@ -101,7 +143,7 @@ impl Lambda { constants::POST_RUNTIME_DURATION_METRIC.into(), MetricValue::distribution(duration_ms), // Datadog expects this value as milliseconds, not seconds - None, + self.get_dynamic_value_tags(), ); if let Err(e) = self .aggregator @@ -113,10 +155,11 @@ impl Lambda { } } - pub(crate) fn generate_network_enhanced_metrics( + pub fn generate_network_enhanced_metrics( network_data_offset: NetworkData, network_data_end: NetworkData, aggr: &mut std::sync::MutexGuard, + tags: Option, ) { let rx_bytes = network_data_end.rx_bytes - network_data_offset.rx_bytes; let tx_bytes = network_data_end.tx_bytes - network_data_offset.tx_bytes; @@ -125,7 +168,7 @@ impl Lambda { let metric = Metric::new( constants::RX_BYTES_METRIC.into(), MetricValue::distribution(rx_bytes), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert rx_bytes metric: {}", e); @@ -134,7 +177,7 @@ impl Lambda { let metric = Metric::new( constants::TX_BYTES_METRIC.into(), MetricValue::distribution(tx_bytes), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert tx_bytes metric: {}", e); @@ -143,7 +186,7 @@ impl Lambda { let metric = Metric::new( constants::TOTAL_NETWORK_METRIC.into(), MetricValue::distribution(total_network), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert total_network metric: {}", e); @@ -161,7 +204,12 @@ impl Lambda { match proc::get_network_data() { Ok(data) => { - Self::generate_network_enhanced_metrics(offset, data, &mut aggr); + Self::generate_network_enhanced_metrics( + offset, + data, + &mut aggr, + self.get_dynamic_value_tags(), + ); } Err(_e) => { debug!("Could not find data to generate network enhanced metrics"); @@ -176,6 +224,7 @@ impl Lambda { cpu_data_offset: &CPUData, cpu_data_end: &CPUData, aggr: &mut std::sync::MutexGuard, + tags: Option, ) { let cpu_user_time = cpu_data_end.total_user_time_ms - cpu_data_offset.total_user_time_ms; let cpu_system_time = @@ -185,7 +234,7 @@ impl Lambda { let metric = Metric::new( constants::CPU_USER_TIME_METRIC.into(), MetricValue::distribution(cpu_user_time), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert cpu_user_time metric: {}", e); @@ -194,7 +243,7 @@ impl Lambda { let metric = Metric::new( constants::CPU_SYSTEM_TIME_METRIC.into(), MetricValue::distribution(cpu_system_time), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert cpu_system_time metric: {}", e); @@ -203,7 +252,7 @@ impl Lambda { let metric = Metric::new( constants::CPU_TOTAL_TIME_METRIC.into(), MetricValue::distribution(cpu_total_time), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert cpu_total_time metric: {}", e); @@ -221,7 +270,12 @@ impl Lambda { let cpu_data = proc::get_cpu_data(); match (cpu_offset, cpu_data) { (Some(cpu_offset), Ok(cpu_data)) => { - Self::generate_cpu_time_enhanced_metrics(&cpu_offset, &cpu_data, &mut aggr); + Self::generate_cpu_time_enhanced_metrics( + &cpu_offset, + &cpu_data, + &mut aggr, + self.get_dynamic_value_tags(), + ); } (_, _) => { debug!("Could not find data to generate cpu time enhanced metrics"); @@ -235,6 +289,7 @@ impl Lambda { uptime_data_offset: f64, uptime_data_end: f64, aggr: &mut std::sync::MutexGuard, + tags: Option, ) { let num_cores = cpu_data_end.individual_cpu_idle_times.len() as f64; let uptime = uptime_data_end - uptime_data_offset; @@ -276,7 +331,7 @@ impl Lambda { let metric = Metric::new( constants::CPU_TOTAL_UTILIZATION_PCT_METRIC.into(), MetricValue::distribution(cpu_total_utilization_pct), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert cpu_total_utilization_pct metric: {}", e); @@ -285,7 +340,7 @@ impl Lambda { let metric = Metric::new( constants::CPU_TOTAL_UTILIZATION_METRIC.into(), MetricValue::distribution(cpu_total_utilization), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert cpu_total_utilization metric: {}", e); @@ -294,7 +349,7 @@ impl Lambda { let metric = Metric::new( constants::NUM_CORES_METRIC.into(), MetricValue::distribution(num_cores), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert num_cores metric: {}", e); @@ -303,7 +358,7 @@ impl Lambda { let metric = Metric::new( constants::CPU_MAX_UTILIZATION_METRIC.into(), MetricValue::distribution(cpu_max_utilization), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert cpu_max_utilization metric: {}", e); @@ -312,7 +367,7 @@ impl Lambda { let metric = Metric::new( constants::CPU_MIN_UTILIZATION_METRIC.into(), MetricValue::distribution(cpu_min_utilization), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert cpu_min_utilization metric: {}", e); @@ -341,6 +396,7 @@ impl Lambda { uptime_offset, uptime_data, &mut aggr, + self.get_dynamic_value_tags(), ); } (_, _, _, _) => { @@ -353,11 +409,12 @@ impl Lambda { tmp_max: f64, tmp_used: f64, aggr: &mut std::sync::MutexGuard, + tags: Option, ) { let metric = Metric::new( constants::TMP_MAX_METRIC.into(), MetricValue::distribution(tmp_max), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert tmp_max metric: {}", e); @@ -366,7 +423,7 @@ impl Lambda { let metric = Metric::new( constants::TMP_USED_METRIC.into(), MetricValue::distribution(tmp_used), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert tmp_used metric: {}", e); @@ -376,7 +433,7 @@ impl Lambda { let metric = Metric::new( constants::TMP_FREE_METRIC.into(), MetricValue::distribution(tmp_free), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert tmp_free metric: {}", e); @@ -389,6 +446,7 @@ impl Lambda { } let aggr = Arc::clone(&self.aggregator); + let tags = self.get_dynamic_value_tags(); tokio::spawn(async move { // Set tmp_max and initial value for tmp_used @@ -410,7 +468,7 @@ impl Lambda { _ = send_metrics.changed() => { let mut aggr: std::sync::MutexGuard = aggr.lock().expect("lock poisoned"); - Self::generate_tmp_enhanced_metrics(tmp_max, tmp_used, &mut aggr); + Self::generate_tmp_enhanced_metrics(tmp_max, tmp_used, &mut aggr, tags); return; } // Otherwise keep monitoring tmp usage periodically @@ -563,7 +621,7 @@ impl Lambda { let metric = metric::Metric::new( constants::DURATION_METRIC.into(), MetricValue::distribution(metrics.duration_ms * constants::MS_TO_SEC), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = aggr.insert(metric) { error!("failed to insert duration metric: {}", e); @@ -571,7 +629,7 @@ impl Lambda { let metric = metric::Metric::new( constants::BILLED_DURATION_METRIC.into(), MetricValue::distribution(metrics.billed_duration_ms as f64 * constants::MS_TO_SEC), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = aggr.insert(metric) { error!("failed to insert billed duration metric: {}", e); @@ -579,7 +637,7 @@ impl Lambda { let metric = metric::Metric::new( constants::MAX_MEMORY_USED_METRIC.into(), MetricValue::distribution(metrics.max_memory_used_mb as f64), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = aggr.insert(metric) { error!("failed to insert max memory used metric: {}", e); @@ -587,7 +645,7 @@ impl Lambda { let metric = metric::Metric::new( constants::MEMORY_SIZE_METRIC.into(), MetricValue::distribution(metrics.memory_size_mb as f64), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = aggr.insert(metric) { error!("failed to insert memory size metric: {}", e); @@ -598,7 +656,7 @@ impl Lambda { let metric = metric::Metric::new( constants::ESTIMATED_COST_METRIC.into(), MetricValue::distribution(cost_usd), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = aggr.insert(metric) { error!("failed to insert estimated cost metric: {}", e); @@ -832,6 +890,7 @@ mod tests { network_offset, network_data, &mut lambda.aggregator.lock().expect("lock poisoned"), + None, ); assert_sketch(&metrics_aggr, constants::RX_BYTES_METRIC, 20000.0); @@ -868,6 +927,7 @@ mod tests { &cpu_offset, &cpu_data, &mut lambda.aggregator.lock().expect("lock poisoned"), + None, ); assert_sketch(&metrics_aggr, constants::CPU_USER_TIME_METRIC, 100.0); @@ -908,6 +968,7 @@ mod tests { uptime_offset, uptime_data, &mut lambda.aggregator.lock().expect("lock poisoned"), + None, ); // the differences above and metric values below are from an invocation using the go agent to verify the calculations @@ -934,6 +995,7 @@ mod tests { tmp_max, tmp_used, &mut lambda.aggregator.lock().expect("lock poisoned"), + None, ); assert_sketch(&metrics_aggr, constants::TMP_MAX_METRIC, 550461440.0); diff --git a/bottlecap/src/secrets/decrypt.rs b/bottlecap/src/secrets/decrypt.rs index a29e6a39d..615e35284 100644 --- a/bottlecap/src/secrets/decrypt.rs +++ b/bottlecap/src/secrets/decrypt.rs @@ -241,6 +241,7 @@ mod tests { aws_secret_access_key: "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY".to_string(), aws_session_token: "AQoDYXdzEJr...".to_string(), function_name: "arn:some-function".to_string(), + sandbox_init_time: Instant::now(), }, RequestArgs { service: "secretsmanager".to_string(), From 6ea2674546b6d586e05101f42fd12b9e0ab5a148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Fri, 15 Nov 2024 12:48:07 -0500 Subject: [PATCH 34/41] feat(bottlecap): support service mapping and `peer.service` tag (#455) * add some helper functions to `invocation::lifecycle` mod * create cold start span on processor * move `generate_span_id` to father module * send `platform_init_start` data to processor * send `PlatformInitStart` to main bus * update cold start `parent_id` * fix start time of cold start span * enhanced metrics now have a `dynamic_value_tags` for tags which we have to calculate at points in time * `AwsConfig` now has a `sandbox_init_time` value * add `is_empty` to `ContextBuffer` * calculate init tags on invoke also add a method to reset processor invocation state * restart init tags on set * set tags properly for proactive init * fix unit test * remove debug line * make sure `cold_start` tag is only set in one place * add service mapping config serializer * add `service_mapping.rs` * add `ServiceNameResolver` interface for service mapping * implement interface in every trigger * send `service_mapping` lookup table to span enricher * create `SpanInferrer` with `service_mapping` config * fmt --- bottlecap/src/config/mod.rs | 6 ++ bottlecap/src/config/service_mapping.rs | 35 ++++++++++ .../src/lifecycle/invocation/processor.rs | 5 +- .../src/lifecycle/invocation/span_inferrer.rs | 44 ++++++------ .../triggers/api_gateway_http_event.rs | 60 ++++++++++++++-- .../triggers/api_gateway_rest_event.rs | 61 ++++++++++++++-- .../invocation/triggers/dynamodb_event.rs | 55 +++++++++++++-- .../invocation/triggers/event_bridge_event.rs | 67 +++++++++++++++--- .../invocation/triggers/kinesis_event.rs | 70 +++++++++++++++---- .../triggers/lambda_function_url_event.rs | 51 ++++++++++++-- .../src/lifecycle/invocation/triggers/mod.rs | 28 +++++++- .../lifecycle/invocation/triggers/s3_event.rs | 48 +++++++++++-- .../invocation/triggers/sns_event.rs | 68 ++++++++++++++---- .../invocation/triggers/sqs_event.rs | 58 +++++++++++---- .../triggers/step_function_event.rs | 21 +++++- 15 files changed, 570 insertions(+), 107 deletions(-) create mode 100644 bottlecap/src/config/service_mapping.rs diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index d1cdd8e5a..54feab25b 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -1,8 +1,10 @@ pub mod flush_strategy; pub mod log_level; pub mod processing_rule; +pub mod service_mapping; pub mod trace_propagation_style; +use std::collections::HashMap; use std::path::Path; use std::time::Instant; use std::vec; @@ -15,6 +17,7 @@ use trace_propagation_style::{deserialize_trace_propagation_style, TracePropagat use crate::config::flush_strategy::FlushStrategy; use crate::config::log_level::{deserialize_log_level, LogLevel}; use crate::config::processing_rule::{deserialize_processing_rules, ProcessingRule}; +use crate::config::service_mapping::deserialize_service_mapping; /// `FailoverConfig` is a struct that represents fields that are not supported in the extension yet. /// @@ -68,6 +71,8 @@ pub struct Config { pub https_proxy: Option, pub capture_lambda_payload: bool, pub capture_lambda_payload_max_depth: u32, + #[serde(deserialize_with = "deserialize_service_mapping")] + pub service_mapping: HashMap, // Trace Propagation #[serde(deserialize_with = "deserialize_trace_propagation_style")] pub trace_propagation_style: Vec, @@ -99,6 +104,7 @@ impl Default for Config { https_proxy: None, capture_lambda_payload: false, capture_lambda_payload_max_depth: 10, + service_mapping: HashMap::new(), // Trace Propagation trace_propagation_style: vec![ TracePropagationStyle::Datadog, diff --git a/bottlecap/src/config/service_mapping.rs b/bottlecap/src/config/service_mapping.rs new file mode 100644 index 000000000..4deda11fd --- /dev/null +++ b/bottlecap/src/config/service_mapping.rs @@ -0,0 +1,35 @@ +use std::collections::HashMap; + +use serde::{Deserialize, Deserializer}; +use tracing::debug; + +#[allow(clippy::module_name_repetitions)] +pub fn deserialize_service_mapping<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: String = String::deserialize(deserializer)?; + + let map = s + .split(',') + .map(|pair| { + let mut split = pair.split(':'); + + let service = split.next(); + let to_map = split.next(); + + if let (Some(service), Some(to_map)) = (service, to_map) { + Ok((service.trim().to_string(), to_map.trim().to_string())) + } else { + debug!("Ignoring invalid service mapping pair: {pair}"); + Err(serde::de::Error::custom(format!( + "Failed to deserialize service mapping for pair: {pair}" + ))) + } + }) + .collect(); + + map +} diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index eb9e00b6c..b8bd3e40b 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -83,7 +83,7 @@ impl Processor { Processor { context_buffer: ContextBuffer::default(), - inferrer: SpanInferrer::default(), + inferrer: SpanInferrer::new(config.service_mapping.clone()), span: create_empty_span(String::from("aws.lambda"), resource, service), cold_start_span: None, extracted_span_context: None, @@ -266,10 +266,7 @@ impl Processor { .meta .insert("request_id".to_string(), request_id.clone()); // todo(duncanista): add missing tags - // - cold start, proactive init // - language - // - function.request - capture lambda payload - // - function.response // - metrics tags (for asm) if let Some(offsets) = &context.enhanced_metric_data { diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index f68134f46..3391bc689 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -24,7 +24,9 @@ use crate::lifecycle::invocation::{ }; use crate::traces::{context::SpanContext, propagation::Propagator}; +#[derive(Default)] pub struct SpanInferrer { + service_mapping: HashMap, // Span inferred from the Lambda incoming request payload pub inferred_span: Option, // Nested span inferred from the Lambda incoming request payload @@ -39,16 +41,11 @@ pub struct SpanInferrer { trigger_tags: Option>, } -impl Default for SpanInferrer { - fn default() -> Self { - Self::new() - } -} - impl SpanInferrer { #[must_use] - pub fn new() -> Self { + pub fn new(service_mapping: HashMap) -> Self { Self { + service_mapping, inferred_span: None, wrapped_inferred_span: None, is_async_span: false, @@ -79,25 +76,25 @@ impl SpanInferrer { if APIGatewayHttpEvent::is_match(payload_value) { if let Some(t) = APIGatewayHttpEvent::new(payload_value.clone()) { - t.enrich_span(&mut inferred_span); + t.enrich_span(&mut inferred_span, &self.service_mapping); trigger = Some(Box::new(t)); } } else if APIGatewayRestEvent::is_match(payload_value) { if let Some(t) = APIGatewayRestEvent::new(payload_value.clone()) { - t.enrich_span(&mut inferred_span); + t.enrich_span(&mut inferred_span, &self.service_mapping); trigger = Some(Box::new(t)); } } else if LambdaFunctionUrlEvent::is_match(payload_value) { if let Some(t) = LambdaFunctionUrlEvent::new(payload_value.clone()) { - t.enrich_span(&mut inferred_span); + t.enrich_span(&mut inferred_span, &self.service_mapping); trigger = Some(Box::new(t)); } } else if SqsRecord::is_match(payload_value) { if let Some(t) = SqsRecord::new(payload_value.clone()) { - t.enrich_span(&mut inferred_span); + t.enrich_span(&mut inferred_span, &self.service_mapping); // Check for SNS event wrapped in the SQS body if let Ok(sns_entity) = serde_json::from_str::(&t.body) { @@ -111,7 +108,7 @@ impl SpanInferrer { sns: sns_entity, event_subscription_arn: None, }; - wt.enrich_span(&mut wrapped_inferred_span); + wt.enrich_span(&mut wrapped_inferred_span, &self.service_mapping); inferred_span.meta.extend(wt.get_tags()); wrapped_inferred_span.duration = @@ -126,7 +123,8 @@ impl SpanInferrer { ..Default::default() }; - event_bridge_entity.enrich_span(&mut wrapped_inferred_span); + event_bridge_entity + .enrich_span(&mut wrapped_inferred_span, &self.service_mapping); inferred_span.meta.extend(event_bridge_entity.get_tags()); wrapped_inferred_span.duration = @@ -139,7 +137,7 @@ impl SpanInferrer { } } else if SnsRecord::is_match(payload_value) { if let Some(t) = SnsRecord::new(payload_value.clone()) { - t.enrich_span(&mut inferred_span); + t.enrich_span(&mut inferred_span, &self.service_mapping); if let Some(message) = &t.sns.message { if let Ok(event_bridge_wrapper_message) = @@ -150,7 +148,8 @@ impl SpanInferrer { ..Default::default() }; - event_bridge_wrapper_message.enrich_span(&mut wrapped_inferred_span); + event_bridge_wrapper_message + .enrich_span(&mut wrapped_inferred_span, &self.service_mapping); inferred_span .meta .extend(event_bridge_wrapper_message.get_tags()); @@ -166,25 +165,25 @@ impl SpanInferrer { } } else if DynamoDbRecord::is_match(payload_value) { if let Some(t) = DynamoDbRecord::new(payload_value.clone()) { - t.enrich_span(&mut inferred_span); + t.enrich_span(&mut inferred_span, &self.service_mapping); trigger = Some(Box::new(t)); } } else if S3Record::is_match(payload_value) { if let Some(t) = S3Record::new(payload_value.clone()) { - t.enrich_span(&mut inferred_span); + t.enrich_span(&mut inferred_span, &self.service_mapping); trigger = Some(Box::new(t)); } } else if EventBridgeEvent::is_match(payload_value) { if let Some(t) = EventBridgeEvent::new(payload_value.clone()) { - t.enrich_span(&mut inferred_span); + t.enrich_span(&mut inferred_span, &self.service_mapping); trigger = Some(Box::new(t)); } } else if KinesisRecord::is_match(payload_value) { if let Some(t) = KinesisRecord::new(payload_value.clone()) { - t.enrich_span(&mut inferred_span); + t.enrich_span(&mut inferred_span, &self.service_mapping); trigger = Some(Box::new(t)); } @@ -240,7 +239,6 @@ impl SpanInferrer { } // TODO: add status tag and other info from response - // TODO: add peer.service pub fn complete_inferred_spans(&mut self, invocation_span: &Span) { if let Some(s) = &mut self.inferred_span { if let Some(ws) = &mut self.wrapped_inferred_span { @@ -262,6 +260,8 @@ impl SpanInferrer { // Set error ws.error = invocation_span.error; + ws.meta + .insert(String::from("peer.service"), s.service.clone()); ws.trace_id = invocation_span.trace_id; } @@ -279,6 +279,10 @@ impl SpanInferrer { // Set error s.error = invocation_span.error; + s.meta.insert( + String::from("peer.service"), + invocation_span.service.clone(), + ); s.trace_id = invocation_span.trace_id; } diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs index db8077257..cdf372001 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs @@ -7,7 +7,8 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::MS_TO_NS, triggers::{ - get_aws_partition_by_region, lowercase_key, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + get_aws_partition_by_region, lowercase_key, ServiceNameResolver, Trigger, + FUNCTION_TRIGGER_EVENT_SOURCE_TAG, }, }; @@ -63,7 +64,7 @@ impl Trigger for APIGatewayHttpEvent { } #[allow(clippy::cast_possible_truncation)] - fn enrich_span(&self, span: &mut Span) { + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { debug!("Enriching an Inferred Span for an API Gateway HTTP Event"); let resource = if self.route_key.is_empty() { format!( @@ -81,8 +82,9 @@ impl Trigger for APIGatewayHttpEvent { path = self.request_context.http.path ); let start_time = (self.request_context.time_epoch as f64 * MS_TO_NS) as i64; - // todo: service mapping - let service_name = self.request_context.domain_name.clone(); + + let service_name = + self.resolve_service_name(service_mapping, &self.request_context.domain_name); span.name = "aws.httpapi".to_string(); span.service = service_name; @@ -191,6 +193,15 @@ impl Trigger for APIGatewayHttpEvent { } } +impl ServiceNameResolver for APIGatewayHttpEvent { + fn get_specific_identifier(&self) -> String { + self.request_context.api_id.clone() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_api_gateway" + } +} #[cfg(test)] mod tests { use super::*; @@ -268,7 +279,8 @@ mod tests { let event = APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.name, "aws.httpapi"); assert_eq!( span.service, @@ -331,7 +343,8 @@ mod tests { let event = APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.name, "aws.httpapi"); assert_eq!( span.service, @@ -393,4 +406,39 @@ mod tests { "arn:aws:apigateway:sa-east-1::/restapis/x02yirxc7a/stages/$default" ); } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("x02yirxc7a".to_string(), "specific-service".to_string()), + ( + "lambda_api_gateway".to_string(), + "generic-service".to_string(), + ), + ]); + + assert_eq!( + event.resolve_service_name( + &specific_service_mapping, + &event.request_context.domain_name + ), + "specific-service" + ); + + let generic_service_mapping = HashMap::from([( + "lambda_api_gateway".to_string(), + "generic-service".to_string(), + )]); + assert_eq!( + event + .resolve_service_name(&generic_service_mapping, &event.request_context.domain_name), + "generic-service" + ); + } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs index e8fc443dd..67a1180be 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs @@ -7,7 +7,8 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::MS_TO_NS, triggers::{ - get_aws_partition_by_region, lowercase_key, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + get_aws_partition_by_region, lowercase_key, ServiceNameResolver, Trigger, + FUNCTION_TRIGGER_EVENT_SOURCE_TAG, }, }; @@ -66,7 +67,7 @@ impl Trigger for APIGatewayRestEvent { } #[allow(clippy::cast_possible_truncation)] - fn enrich_span(&self, span: &mut Span) { + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { debug!("Enriching an Inferred Span for an API Gateway REST Event"); let resource = format!( "{http_method} {path}", @@ -79,8 +80,9 @@ impl Trigger for APIGatewayRestEvent { path = self.request_context.path ); let start_time = (self.request_context.time_epoch as f64 * MS_TO_NS) as i64; - // todo: service mapping - let service_name = self.request_context.domain_name.clone(); + + let service_name = + self.resolve_service_name(service_mapping, &self.request_context.domain_name); span.name = "aws.apigateway".to_string(); span.service = service_name; @@ -179,6 +181,16 @@ impl Trigger for APIGatewayRestEvent { } } +impl ServiceNameResolver for APIGatewayRestEvent { + fn get_specific_identifier(&self) -> String { + self.request_context.api_id.clone() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_api_gateway" + } +} + #[cfg(test)] mod tests { use super::*; @@ -240,7 +252,8 @@ mod tests { let event = APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.name, "aws.apigateway"); assert_eq!(span.service, "id.execute-api.us-east-1.amazonaws.com"); assert_eq!(span.resource, "GET /path"); @@ -298,7 +311,8 @@ mod tests { let event = APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.name, "aws.apigateway"); assert_eq!( span.service, @@ -368,4 +382,39 @@ mod tests { "arn:aws:apigateway:us-east-1::/restapis/id/stages/$default" ); } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("id".to_string(), "specific-service".to_string()), + ( + "lambda_api_gateway".to_string(), + "generic-service".to_string(), + ), + ]); + + assert_eq!( + event.resolve_service_name( + &specific_service_mapping, + &event.request_context.domain_name + ), + "specific-service" + ); + + let generic_service_mapping = HashMap::from([( + "lambda_api_gateway".to_string(), + "generic-service".to_string(), + )]); + assert_eq!( + event + .resolve_service_name(&generic_service_mapping, &event.request_context.domain_name), + "generic-service" + ); + } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs b/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs index 026e74832..8503f46c5 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs @@ -6,7 +6,7 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::S_TO_NS, - triggers::{Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, + triggers::{ServiceNameResolver, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, }; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -74,14 +74,14 @@ impl Trigger for DynamoDbRecord { } #[allow(clippy::cast_possible_truncation)] - fn enrich_span(&self, span: &mut Span) { + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { debug!("Enriching an Inferred Span for a DynamoDB event"); - let table_name = self.event_source_arn.split('/').nth(1).unwrap_or_default(); + let table_name = self.get_specific_identifier(); let resource = format!("{} {}", self.event_name.clone(), table_name); let start_time = (self.dynamodb.approximate_creation_date_time * S_TO_NS) as i64; - // todo: service mapping and peer service - let service_name = "dynamodb"; + + let service_name = self.resolve_service_name(service_mapping, "dynamodb"); span.name = String::from("aws.dynamodb"); span.service = service_name.to_string(); @@ -129,6 +129,20 @@ impl Trigger for DynamoDbRecord { } } +impl ServiceNameResolver for DynamoDbRecord { + fn get_specific_identifier(&self) -> String { + self.event_source_arn + .split('/') + .nth(1) + .unwrap_or_default() + .to_string() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_dynamodb" + } +} + #[cfg(test)] mod tests { use super::*; @@ -176,7 +190,8 @@ mod tests { let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); let event = DynamoDbRecord::new(payload).expect("Failed to deserialize DynamoDbRecord"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.name, "aws.dynamodb"); assert_eq!(span.service, "dynamodb"); assert_eq!(span.resource, "INSERT ExampleTableWithStream"); @@ -237,4 +252,32 @@ mod tests { assert_eq!(carrier, expected); } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = DynamoDbRecord::new(payload).expect("Failed to deserialize DynamoDbRecord"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ( + "ExampleTableWithStream".to_string(), + "specific-service".to_string(), + ), + ("lambda_dynamodb".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "dynamodb"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_dynamodb".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "dynamodb"), + "generic-service" + ); + } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs index ff7d174c6..f9b1e17b1 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs @@ -7,7 +7,9 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::{MS_TO_NS, S_TO_NS}, - triggers::{Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, + triggers::{ + ServiceNameResolver, Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, }; const DATADOG_START_TIME_KEY: &str = "x-datadog-start-time"; @@ -49,7 +51,7 @@ impl Trigger for EventBridgeEvent { } #[allow(clippy::cast_possible_truncation)] - fn enrich_span(&self, span: &mut Span) { + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { // EventBridge events have a timestamp resolution in seconds let start_time_seconds = self .time @@ -57,17 +59,13 @@ impl Trigger for EventBridgeEvent { .unwrap_or((self.time.timestamp_millis() as f64 * S_TO_NS) as i64); let carrier = self.get_carrier(); - let resource_name = carrier - .get(DATADOG_RESOURCE_NAME_KEY) - .unwrap_or(&self.source) - .clone(); + let resource_name = self.get_specific_identifier(); let start_time = carrier .get(DATADOG_START_TIME_KEY) .and_then(|s| s.parse::().ok()) .map_or(start_time_seconds, |s| (s * MS_TO_NS) as i64); - // todo: service mapping and peer service - let service_name = "eventbridge"; + let service_name = self.resolve_service_name(service_mapping, "eventbridge"); span.name = String::from("aws.eventbridge"); span.service = service_name.to_string(); @@ -105,6 +103,20 @@ impl Trigger for EventBridgeEvent { } } +impl ServiceNameResolver for EventBridgeEvent { + fn get_specific_identifier(&self) -> String { + let carrier = self.get_carrier(); + carrier + .get(DATADOG_RESOURCE_NAME_KEY) + .unwrap_or(&self.source) + .to_string() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_eventbridge" + } +} + #[cfg(test)] mod tests { use super::*; @@ -168,7 +180,8 @@ mod tests { EventBridgeEvent::new(payload).expect("Failed to deserialize into EventBridgeEvent"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); let expected = serde_json::from_str(&read_json_file("eventbridge_span.json")) .expect("Failed to deserialize into Span"); @@ -183,7 +196,8 @@ mod tests { EventBridgeEvent::new(payload).expect("Failed to deserialize into EventBridgeEvent"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.resource, "my.event"); } @@ -196,7 +210,8 @@ mod tests { EventBridgeEvent::new(payload).expect("Failed to deserialize into EventBridgeEvent"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.resource, "testBus"); // Seconds resolution @@ -239,4 +254,34 @@ mod tests { assert_eq!(carrier, expected); } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = EventBridgeEvent::new(payload).expect("Failed to deserialize EventBridgeEvent"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("testBus".to_string(), "specific-service".to_string()), + ( + "lambda_eventbridge".to_string(), + "generic-service".to_string(), + ), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "eventbridge"), + "specific-service" + ); + + let generic_service_mapping = HashMap::from([( + "lambda_eventbridge".to_string(), + "generic-service".to_string(), + )]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "eventbridge"), + "generic-service" + ); + } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs index c735d5439..ae55add0c 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs @@ -9,7 +9,9 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::S_TO_NS, - triggers::{Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, + triggers::{ + ServiceNameResolver, Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, }; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -69,20 +71,24 @@ impl Trigger for KinesisRecord { } #[allow(clippy::cast_possible_truncation)] - fn enrich_span(&self, span: &mut Span) { - let event_source_arn = &self.event_source_arn; - let parsed_stream_name = event_source_arn.split('/').last().unwrap_or_default(); - let parsed_shard_id = self.event_id.split(':').next().unwrap_or_default(); - span.name = "aws.kinesis".to_string(); - span.service = "kinesis".to_string(); + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { + let stream_name = self.get_specific_identifier(); + let shard_id = self.event_id.split(':').next().unwrap_or_default(); + let service_name = self.resolve_service_name(service_mapping, "kinesis"); + + span.name = String::from("aws.kinesis"); + span.service = service_name; span.start = (self.kinesis.approximate_arrival_timestamp * S_TO_NS) as i64; - span.resource = parsed_stream_name.to_string(); + span.resource.clone_from(&stream_name); span.r#type = "web".to_string(); span.meta = HashMap::from([ ("operation_name".to_string(), "aws.kinesis".to_string()), - ("stream_name".to_string(), parsed_stream_name.to_string()), - ("shard_id".to_string(), parsed_shard_id.to_string()), - ("event_source_arn".to_string(), event_source_arn.to_string()), + ("stream_name".to_string(), stream_name.to_string()), + ("shard_id".to_string(), shard_id.to_string()), + ( + "event_source_arn".to_string(), + self.event_source_arn.to_string(), + ), ("event_id".to_string(), self.event_id.to_string()), ("event_name".to_string(), self.event_name.to_string()), ("event_version".to_string(), self.event_version.to_string()), @@ -120,6 +126,20 @@ impl Trigger for KinesisRecord { } } +impl ServiceNameResolver for KinesisRecord { + fn get_specific_identifier(&self) -> String { + self.event_source_arn + .split('/') + .last() + .unwrap_or_default() + .to_string() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_kinesis" + } +} + #[cfg(test)] mod tests { use super::*; @@ -170,7 +190,8 @@ mod tests { let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); let event = KinesisRecord::new(payload).expect("Failed to deserialize S3Record"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.name, "aws.kinesis"); assert_eq!(span.service, "kinesis"); assert_eq!(span.resource, "kinesisStream"); @@ -245,4 +266,29 @@ mod tests { assert_eq!(carrier, expected); } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = KinesisRecord::new(payload).expect("Failed to deserialize KinesisRecord"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("kinesisStream".to_string(), "specific-service".to_string()), + ("lambda_kinesis".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "kinesis"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_kinesis".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "kinesis"), + "generic-service" + ); + } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs b/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs index 087677a27..14a2eaa32 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs @@ -6,7 +6,7 @@ use serde_json::Value; use crate::lifecycle::invocation::{ processor::MS_TO_NS, - triggers::{lowercase_key, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, + triggers::{lowercase_key, ServiceNameResolver, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, }; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -28,6 +28,8 @@ pub struct RequestContext { pub time_epoch: i64, #[serde(rename = "requestId")] pub request_id: String, + #[serde(rename = "apiId")] + pub api_id: String, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -61,7 +63,7 @@ impl Trigger for LambdaFunctionUrlEvent { } #[allow(clippy::cast_possible_truncation)] - fn enrich_span(&self, span: &mut Span) { + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { let resource = format!( "{} {}", self.request_context.http.method, self.request_context.http.path @@ -74,8 +76,9 @@ impl Trigger for LambdaFunctionUrlEvent { ); let start_time = (self.request_context.time_epoch as f64 * MS_TO_NS) as i64; - // todo: service mapping and peer service - let service_name = self.request_context.domain_name.clone(); + + let service_name = + self.resolve_service_name(service_mapping, &self.request_context.domain_name); span.name = String::from("aws.lambda.url"); span.service = service_name; @@ -168,6 +171,16 @@ impl Trigger for LambdaFunctionUrlEvent { } } +impl ServiceNameResolver for LambdaFunctionUrlEvent { + fn get_specific_identifier(&self) -> String { + self.request_context.api_id.clone() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_url" + } +} + #[cfg(test)] mod tests { use super::*; @@ -222,6 +235,7 @@ mod tests { }, account_id: String::from("601427279990"), domain_name: String::from("a8hyhsshac.lambda-url.eu-south-1.amazonaws.com"), + api_id: String::from("a8hyhsshac"), }, }; @@ -252,7 +266,8 @@ mod tests { let event = LambdaFunctionUrlEvent::new(payload) .expect("Failed to deserialize LambdaFunctionUrlEvent"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.name, "aws.lambda.url"); assert_eq!( span.service, @@ -306,4 +321,30 @@ mod tests { ); env::remove_var("AWS_LAMBDA_FUNCTION_NAME"); } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("lambda_function_url_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = LambdaFunctionUrlEvent::new(payload) + .expect("Failed to deserialize LambdaFunctionUrlEvent"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("a8hyhsshac".to_string(), "specific-service".to_string()), + ("lambda_url".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "domain-name"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_url".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "domain-name"), + "generic-service" + ); + } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index 6704a459d..2f9a0100a 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -19,18 +19,42 @@ pub const DATADOG_CARRIER_KEY: &str = "_datadog"; pub const FUNCTION_TRIGGER_EVENT_SOURCE_TAG: &str = "function_trigger.event_source"; pub const FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG: &str = "function_trigger.event_source_arn"; -pub trait Trigger { +/// Resolves the service name for a given trigger depending on +/// service mapping configuration. +pub trait ServiceNameResolver { + /// Get the specific service name for this trigger type, it will + /// be used as a key to resolve the service name + fn get_specific_identifier(&self) -> String; + + /// Get the generic service mapping key for the trigger + fn get_generic_identifier(&self) -> &'static str; +} + +pub trait Trigger: ServiceNameResolver { fn new(payload: Value) -> Option where Self: Sized; fn is_match(payload: &Value) -> bool where Self: Sized; - fn enrich_span(&self, span: &mut Span); + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap); fn get_tags(&self) -> HashMap; fn get_arn(&self, region: &str) -> String; fn get_carrier(&self) -> HashMap; fn is_async(&self) -> bool; + + /// Default implementation for service name resolution + fn resolve_service_name( + &self, + service_mapping: &HashMap, + fallback: &str, + ) -> String { + service_mapping + .get(&self.get_specific_identifier()) + .or_else(|| service_mapping.get(self.get_generic_identifier())) + .unwrap_or(&fallback.to_string()) + .to_string() + } } #[must_use] diff --git a/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs b/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs index 1e7fe5beb..d45dc1f50 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs @@ -8,7 +8,7 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::MS_TO_NS, - triggers::{Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, + triggers::{ServiceNameResolver, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, }; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] @@ -77,15 +77,15 @@ impl Trigger for S3Record { } #[allow(clippy::cast_possible_truncation)] - fn enrich_span(&self, span: &mut Span) { + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { debug!("Enriching an InferredSpan span with S3 event"); - let bucket_name = self.s3.bucket.name.clone(); + let bucket_name = self.get_specific_identifier(); let start_time = self .event_time .timestamp_nanos_opt() .unwrap_or((self.event_time.timestamp_millis() as f64 * MS_TO_NS) as i64); - // todo: service mapping - let service_name = "s3"; + + let service_name = self.resolve_service_name(service_mapping, "s3"); span.name = String::from("aws.s3"); span.service = service_name.to_string(); @@ -123,6 +123,16 @@ impl Trigger for S3Record { } } +impl ServiceNameResolver for S3Record { + fn get_specific_identifier(&self) -> String { + self.s3.bucket.name.clone() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_s3" + } +} + #[cfg(test)] mod tests { use super::*; @@ -177,7 +187,8 @@ mod tests { let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); let event = S3Record::new(payload).expect("Failed to deserialize S3Record"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.name, "aws.s3"); assert_eq!(span.service, "s3"); assert_eq!(span.resource, "example-bucket"); @@ -237,4 +248,29 @@ mod tests { assert_eq!(carrier, expected); } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = S3Record::new(payload).expect("Failed to deserialize S3Record"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("example-bucket".to_string(), "specific-service".to_string()), + ("lambda_s3".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "s3"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_s3".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "s3"), + "generic-service" + ); + } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs index 2b7514cf1..47091a9d6 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use chrono::{DateTime, Utc}; +use datadog_trace_protobuf::pb::Span; use serde::{Deserialize, Serialize}; use serde_json::Value; use tracing::debug; @@ -9,7 +10,7 @@ use crate::lifecycle::invocation::{ base64_to_string, processor::MS_TO_NS, triggers::{ - event_bridge_event::EventBridgeEvent, Trigger, DATADOG_CARRIER_KEY, + event_bridge_event::EventBridgeEvent, ServiceNameResolver, Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, }, }; @@ -82,33 +83,26 @@ impl Trigger for SnsRecord { } #[allow(clippy::cast_possible_truncation)] - fn enrich_span(&self, span: &mut datadog_trace_protobuf::pb::Span) { + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { debug!("Enriching an Inferred Span for an SNS Event"); - let resource = self - .sns - .topic_arn - .clone() - .split(':') - .last() - .unwrap_or_default() - .to_string(); + let resource_name = self.get_specific_identifier(); let start_time = self .sns .timestamp .timestamp_nanos_opt() .unwrap_or((self.sns.timestamp.timestamp_millis() as f64 * MS_TO_NS) as i64); - // todo: service mapping - let service_name = "sns".to_string(); + + let service_name = self.resolve_service_name(service_mapping, "sns"); span.name = "aws.sns".to_string(); span.service = service_name.to_string(); - span.resource.clone_from(&resource); + span.resource.clone_from(&resource_name); span.r#type = "web".to_string(); span.start = start_time; span.meta.extend([ ("operation_name".to_string(), "aws.sns".to_string()), - ("topicname".to_string(), resource), + ("topicname".to_string(), resource_name), ("topic_arn".to_string(), self.sns.topic_arn.clone()), ("message_id".to_string(), self.sns.message_id.clone()), ("type".to_string(), self.sns.r#type.clone()), @@ -164,6 +158,21 @@ impl Trigger for SnsRecord { } } +impl ServiceNameResolver for SnsRecord { + fn get_specific_identifier(&self) -> String { + self.sns + .topic_arn + .split(':') + .last() + .unwrap_or_default() + .to_string() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_sns" + } +} + #[cfg(test)] mod tests { use datadog_trace_protobuf::pb::Span; @@ -224,7 +233,8 @@ mod tests { let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.name, "aws.sns"); assert_eq!(span.service, "sns"); assert_eq!(span.resource, "serverlessTracingTopicPy"); @@ -341,4 +351,32 @@ mod tests { assert_eq!(carrier, expected); } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ( + "serverlessTracingTopicPy".to_string(), + "specific-service".to_string(), + ), + ("lambda_sns".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "sns"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_sns".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "sns"), + "generic-service" + ); + } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs index 6e748d4d9..c9766a736 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs @@ -10,7 +10,7 @@ use crate::lifecycle::invocation::{ event_bridge_event::EventBridgeEvent, get_aws_partition_by_region, sns_event::{SnsEntity, SnsRecord}, - Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + ServiceNameResolver, Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, }, }; @@ -98,23 +98,17 @@ impl Trigger for SqsRecord { } #[allow(clippy::cast_possible_truncation)] - fn enrich_span(&self, span: &mut Span) { + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { debug!("Enriching an Inferred Span for an SQS Event"); - let resource = self - .event_source_arn - .clone() - .split(':') - .last() - .unwrap_or_default() - .to_string(); + let resource = self.get_specific_identifier(); let start_time = (self .attributes .sent_timestamp .parse::() .unwrap_or_default() as f64 * MS_TO_NS) as i64; - // todo: service mapping - let service_name = "sqs"; + + let service_name = self.resolve_service_name(service_mapping, "sqs"); span.name = "aws.sqs".to_string(); span.service = service_name.to_string(); @@ -198,6 +192,20 @@ impl Trigger for SqsRecord { } } +impl ServiceNameResolver for SqsRecord { + fn get_specific_identifier(&self) -> String { + self.event_source_arn + .split(':') + .last() + .unwrap_or_default() + .to_string() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_sqs" + } +} + #[cfg(test)] mod tests { use super::*; @@ -260,7 +268,8 @@ mod tests { let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); let mut span = Span::default(); - event.enrich_span(&mut span); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); assert_eq!(span.name, "aws.sqs"); assert_eq!(span.service, "sqs"); assert_eq!(span.resource, "MyQueue"); @@ -391,4 +400,29 @@ mod tests { assert_eq!(carrier, expected); } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("MyQueue".to_string(), "specific-service".to_string()), + ("lambda_sqs".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "sqs"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_sqs".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "sqs"), + "generic-service" + ); + } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs b/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs index 91eb2af54..ee77434bc 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs @@ -5,7 +5,9 @@ use serde_json::Value; use sha2::{Digest, Sha256}; use crate::{ - lifecycle::invocation::triggers::{Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, + lifecycle::invocation::triggers::{ + ServiceNameResolver, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, traces::{ context::{Sampling, SpanContext}, propagation::text_map_propagator::DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY, @@ -82,7 +84,12 @@ impl Trigger for StepFunctionEvent { execution_id.is_some() && name.is_some() && entered_time.is_some() } - fn enrich_span(&self, _span: &mut datadog_trace_protobuf::pb::Span) {} + fn enrich_span( + &self, + _span: &mut datadog_trace_protobuf::pb::Span, + _service_mapping: &HashMap, + ) { + } fn get_tags(&self) -> HashMap { HashMap::from([( @@ -182,6 +189,16 @@ impl StepFunctionEvent { } } +impl ServiceNameResolver for StepFunctionEvent { + fn get_specific_identifier(&self) -> String { + String::new() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_stepfunction" + } +} + #[cfg(test)] mod tests { use super::*; From b52e73818ecd91165ef28e2cee91abd050d1188d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Fri, 15 Nov 2024 12:59:03 -0500 Subject: [PATCH 35/41] add missing tags to new metrics (#456) --- bottlecap/src/metrics/enhanced/lambda.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/bottlecap/src/metrics/enhanced/lambda.rs b/bottlecap/src/metrics/enhanced/lambda.rs index a0b02aef7..af5996ff6 100644 --- a/bottlecap/src/metrics/enhanced/lambda.rs +++ b/bottlecap/src/metrics/enhanced/lambda.rs @@ -491,11 +491,12 @@ impl Lambda { fd_max: f64, fd_use: f64, aggr: &mut std::sync::MutexGuard, + tags: Option, ) { let metric = Metric::new( constants::FD_MAX_METRIC.into(), MetricValue::distribution(fd_max), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert fd_max metric: {}", e); @@ -506,7 +507,7 @@ impl Lambda { let metric = Metric::new( constants::FD_USE_METRIC.into(), MetricValue::distribution(fd_use), - None, + tags, ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert fd_use metric: {}", e); @@ -518,11 +519,12 @@ impl Lambda { threads_max: f64, threads_use: f64, aggr: &mut std::sync::MutexGuard, + tags: Option, ) { let metric = Metric::new( constants::THREADS_MAX_METRIC.into(), MetricValue::distribution(threads_max), - None, + tags.clone(), ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert threads_max metric: {}", e); @@ -533,7 +535,7 @@ impl Lambda { let metric = Metric::new( constants::THREADS_USE_METRIC.into(), MetricValue::distribution(threads_use), - None, + tags, ); if let Err(e) = aggr.insert(metric) { error!("Failed to insert threads_use metric: {}", e); @@ -547,6 +549,7 @@ impl Lambda { } let aggr = Arc::clone(&self.aggregator); + let tags = self.get_dynamic_value_tags(); tokio::spawn(async move { // get list of all process ids @@ -568,8 +571,8 @@ impl Lambda { _ = send_metrics.changed() => { let mut aggr: std::sync::MutexGuard = aggr.lock().expect("lock poisoned"); - Self::generate_fd_enhanced_metrics(fd_max, fd_use, &mut aggr); - Self::generate_threads_enhanced_metrics(threads_max, threads_use, &mut aggr); + Self::generate_fd_enhanced_metrics(fd_max, fd_use, &mut aggr, tags.clone()); + Self::generate_threads_enhanced_metrics(threads_max, threads_use, &mut aggr, tags); return; } // Otherwise keep monitoring file descriptor and thread usage periodically @@ -1015,6 +1018,7 @@ mod tests { fd_max, fd_use, &mut lambda.aggregator.lock().expect("lock poisoned"), + None, ); assert_sketch(&metrics_aggr, constants::FD_MAX_METRIC, 1024.0); @@ -1033,6 +1037,7 @@ mod tests { fd_max, fd_use, &mut lambda.aggregator.lock().expect("lock poisoned"), + None, ); assert_sketch(&metrics_aggr, constants::FD_MAX_METRIC, 1024.0); @@ -1055,6 +1060,7 @@ mod tests { threads_max, threads_use, &mut lambda.aggregator.lock().expect("lock poisoned"), + None, ); assert_sketch(&metrics_aggr, constants::THREADS_MAX_METRIC, 1024.0); @@ -1073,6 +1079,7 @@ mod tests { threads_max, threads_use, &mut lambda.aggregator.lock().expect("lock poisoned"), + None, ); assert_sketch(&metrics_aggr, constants::THREADS_MAX_METRIC, 1024.0); From 9d5f78726afb5627ef17c36bfae887342dc0979d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:31:19 -0500 Subject: [PATCH 36/41] update numbers, sadly (#457) --- .gitlab/scripts/check_layer_size.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab/scripts/check_layer_size.sh b/.gitlab/scripts/check_layer_size.sh index 573d3884a..c53615cb6 100755 --- a/.gitlab/scripts/check_layer_size.sh +++ b/.gitlab/scripts/check_layer_size.sh @@ -14,8 +14,8 @@ if [ -z "$LAYER_FILE" ]; then exit 1 fi -MAX_LAYER_COMPRESSED_SIZE_KB=$(expr 19 \* 1024) # 19 MB, amd64 is 19, while arm64 is 15 -MAX_LAYER_UNCOMPRESSED_SIZE_KB=$(expr 49 \* 1024) # 49 MB, amd is 49, while arm64 is 48 +MAX_LAYER_COMPRESSED_SIZE_KB=$(expr 20 \* 1024) # 20 MB, amd64 is 19, while arm64 is 18 +MAX_LAYER_UNCOMPRESSED_SIZE_KB=$(expr 50 \* 1024) # 50 MB, amd is 50, while arm64 is 47 LAYERS_DIR=".layers" From f2baa389455b32022cd21aa9e582481508db3150 Mon Sep 17 00:00:00 2001 From: alexgallotta <5581237+alexgallotta@users.noreply.github.com> Date: Fri, 15 Nov 2024 16:42:14 -0500 Subject: [PATCH 37/41] feat(bottlecap): add aws trace header for java and sqs (#452) * add aws trace header for java and sqs * fix priority sampling * remove clippy warnings * fix: do not skip inferred spans with aws headers * make clippy happy * add comment for 64 bits trace id * fix clippy warnings * fix import and tests * format * remove dead code --- bottlecap/src/lifecycle/invocation/context.rs | 46 ++++---- .../src/lifecycle/invocation/span_inferrer.rs | 17 ++- .../triggers/lambda_function_url_event.rs | 2 +- .../lifecycle/invocation/triggers/s3_event.rs | 1 + .../invocation/triggers/sns_event.rs | 1 + .../invocation/triggers/sqs_event.rs | 105 +++++++++++++++++- .../triggers/step_function_event.rs | 20 ++-- bottlecap/src/metrics/enhanced/lambda.rs | 11 +- bottlecap/src/proc/mod.rs | 48 ++++---- bottlecap/src/traces/propagation/mod.rs | 48 +++++--- .../traces/propagation/text_map_propagator.rs | 1 + .../tests/payloads/eventbridge_sqs_event.json | 1 - .../eventbridge_sqs_java_header_event.json | 21 ++++ 13 files changed, 232 insertions(+), 90 deletions(-) create mode 100644 bottlecap/tests/payloads/eventbridge_sqs_java_header_event.json diff --git a/bottlecap/src/lifecycle/invocation/context.rs b/bottlecap/src/lifecycle/invocation/context.rs index 680a967ba..97e6bbf14 100644 --- a/bottlecap/src/lifecycle/invocation/context.rs +++ b/bottlecap/src/lifecycle/invocation/context.rs @@ -96,7 +96,7 @@ impl ContextBuffer { /// Creates a new `Context` and adds it to the buffer. /// pub fn create_context(&mut self, request_id: String) { - self.insert(Context::new(request_id, 0.0, 0.0, 0, None)); + self.insert(Context::new(request_id, 0f64, 0f64, 0, None)); } /// Adds the init duration to a `Context` in the buffer. @@ -188,20 +188,20 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); let request_id_2 = String::from("2"); - let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0, None); + let context = Context::new(request_id_2.clone(), 0f64, 0f64, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 2); assert_eq!(buffer.get(&request_id_2).unwrap(), &context); // This should replace the first context let request_id_3 = String::from("3"); - let context = Context::new(request_id_3.clone(), 0.0, 0.0, 0, None); + let context = Context::new(request_id_3.clone(), 0f64, 0f64, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 2); assert_eq!(buffer.get(&request_id_3).unwrap(), &context); @@ -215,13 +215,13 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); let request_id_2 = String::from("2"); - let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0, None); + let context = Context::new(request_id_2.clone(), 0f64, 0f64, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 2); assert_eq!(buffer.get(&request_id_2).unwrap(), &context); @@ -242,13 +242,13 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); let request_id_2 = String::from("2"); - let context = Context::new(request_id_2.clone(), 0.0, 0.0, 0, None); + let context = Context::new(request_id_2.clone(), 0f64, 0f64, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 2); assert_eq!(buffer.get(&request_id_2).unwrap(), &context); @@ -263,13 +263,13 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); - buffer.add_init_duration(&request_id, 100.0); - assert_eq!(buffer.get(&request_id).unwrap().init_duration_ms, 100.0); + buffer.add_init_duration(&request_id, 100f64); + assert!((buffer.get(&request_id).unwrap().init_duration_ms - 100f64).abs() < f64::EPSILON); } #[test] @@ -277,7 +277,7 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); @@ -291,13 +291,15 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); - buffer.add_runtime_duration(&request_id, 100.0); - assert_eq!(buffer.get(&request_id).unwrap().runtime_duration_ms, 100.0); + buffer.add_runtime_duration(&request_id, 100f64); + assert!( + (buffer.get(&request_id).unwrap().runtime_duration_ms - 100f64).abs() < f64::EPSILON + ); } #[test] @@ -305,27 +307,27 @@ mod tests { let mut buffer = ContextBuffer::with_capacity(2); let request_id = String::from("1"); - let context = Context::new(request_id.clone(), 0.0, 0.0, 0, None); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); buffer.insert(context.clone()); assert_eq!(buffer.size(), 1); assert_eq!(buffer.get(&request_id).unwrap(), &context); let network_offset = Some(NetworkData { - rx_bytes: 180.0, + rx_bytes: 180f64, tx_bytes: 254.0, }); let mut individual_cpu_idle_times = HashMap::new(); - individual_cpu_idle_times.insert("cpu0".to_string(), 10.0); - individual_cpu_idle_times.insert("cpu1".to_string(), 20.0); + individual_cpu_idle_times.insert("cpu0".to_string(), 10f64); + individual_cpu_idle_times.insert("cpu1".to_string(), 20f64); let cpu_offset = Some(CPUData { - total_user_time_ms: 100.0, + total_user_time_ms: 100f64, total_system_time_ms: 53.0, - total_idle_time_ms: 20.0, + total_idle_time_ms: 20f64, individual_cpu_idle_times, }); - let uptime_offset = Some(50.0); + let uptime_offset = Some(50f64); let (tmp_chan_tx, _) = watch::channel(()); let (process_chan_tx, _) = watch::channel(()); diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs index 3391bc689..a916305f7 100644 --- a/bottlecap/src/lifecycle/invocation/span_inferrer.rs +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -17,7 +17,7 @@ use crate::lifecycle::invocation::{ lambda_function_url_event::LambdaFunctionUrlEvent, s3_event::S3Record, sns_event::{SnsEntity, SnsRecord}, - sqs_event::SqsRecord, + sqs_event::{extract_trace_context_from_aws_trace_header, SqsRecord}, step_function_event::StepFunctionEvent, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, }, @@ -35,7 +35,7 @@ pub struct SpanInferrer { is_async_span: bool, // Carrier to extract the span context from carrier: Option>, - // Generated Span Context from Step Functions + // Generated Span Context from Step Functions or context taken from `AWSTraceHeader` when java->sqs->java generated_span_context: Option, // Tags generated from the trigger trigger_tags: Option>, @@ -74,6 +74,8 @@ impl SpanInferrer { ..Default::default() }; + let mut is_step_function = false; + if APIGatewayHttpEvent::is_match(payload_value) { if let Some(t) = APIGatewayHttpEvent::new(payload_value.clone()) { t.enrich_span(&mut inferred_span, &self.service_mapping); @@ -96,6 +98,10 @@ impl SpanInferrer { if let Some(t) = SqsRecord::new(payload_value.clone()) { t.enrich_span(&mut inferred_span, &self.service_mapping); + self.generated_span_context = extract_trace_context_from_aws_trace_header( + t.attributes.aws_trace_header.clone(), + ); + // Check for SNS event wrapped in the SQS body if let Ok(sns_entity) = serde_json::from_str::(&t.body) { debug!("Found an SNS event wrapped in the SQS body"); @@ -191,6 +197,7 @@ impl SpanInferrer { if let Some(t) = StepFunctionEvent::new(payload_value.clone()) { self.generated_span_context = Some(t.get_span_context()); trigger = Some(Box::new(t)); + is_step_function = true; } } else { debug!("Unable to infer span from payload: no matching trigger found"); @@ -209,7 +216,7 @@ impl SpanInferrer { self.is_async_span = t.is_async(); // For Step Functions, there is no inferred span - if self.generated_span_context.is_some() { + if is_step_function && self.generated_span_context.is_some() { self.inferred_span = None; } else { self.inferred_span = Some(inferred_span); @@ -295,8 +302,8 @@ impl SpanInferrer { /// pub fn get_span_context(&self, propagator: &impl Propagator) -> Option { // Step Functions `SpanContext` is deterministically generated - if let Some(sc) = &self.generated_span_context { - return Some(sc.clone()); + if self.generated_span_context.is_some() { + return self.generated_span_context.clone(); } if let Some(sc) = self.carrier.as_ref().and_then(|c| propagator.extract(c)) { diff --git a/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs b/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs index 14a2eaa32..18bdc734d 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs @@ -225,7 +225,7 @@ mod tests { ]), request_context: RequestContext { request_id: String::from("ec4d58f8-2b8b-4ceb-a1d5-2be7bff58505"), - time_epoch: 1637169449721, + time_epoch: 1_637_169_449_721, http: Http { method: String::from("GET"), path: String::from("/"), diff --git a/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs b/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs index d45dc1f50..43065cb0f 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs @@ -134,6 +134,7 @@ impl ServiceNameResolver for S3Record { } #[cfg(test)] +#[allow(clippy::unwrap_used)] mod tests { use super::*; use crate::lifecycle::invocation::triggers::test_utils::read_json_file; diff --git a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs index 47091a9d6..091dcdf53 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs @@ -174,6 +174,7 @@ impl ServiceNameResolver for SnsRecord { } #[cfg(test)] +#[allow(clippy::unwrap_used)] mod tests { use datadog_trace_protobuf::pb::Span; diff --git a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs index c9766a736..a4bf0e44f 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs @@ -13,6 +13,7 @@ use crate::lifecycle::invocation::{ ServiceNameResolver, Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, }, }; +use crate::traces::context::{Sampling, SpanContext}; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct SqsEvent { @@ -64,6 +65,8 @@ pub struct Attributes { pub sent_timestamp: String, #[serde(rename = "SenderId")] pub sender_id: String, + #[serde(rename = "AWSTraceHeader")] + pub aws_trace_header: Option, } impl Trigger for SqsRecord { @@ -163,12 +166,9 @@ impl Trigger for SqsRecord { } } - fn is_async(&self) -> bool { - true - } - fn get_carrier(&self) -> HashMap { let carrier = HashMap::new(); + if let Some(ma) = self.message_attributes.get(DATADOG_CARRIER_KEY) { if let Some(string_value) = &ma.string_value { return serde_json::from_str(string_value).unwrap_or_default(); @@ -190,6 +190,10 @@ impl Trigger for SqsRecord { // TODO: AWSTraceHeader carrier } + + fn is_async(&self) -> bool { + true + } } impl ServiceNameResolver for SqsRecord { @@ -206,7 +210,74 @@ impl ServiceNameResolver for SqsRecord { } } +// extractTraceContextfromAWSTraceHeader extracts trace context from the +// AWSTraceHeader directly. Unlike the other carriers in this file, it should +// not be passed to the tracer.Propagator, instead extracting context directly. +pub(crate) fn extract_trace_context_from_aws_trace_header( + headers_string: Option, +) -> Option { + let value = headers_string?; + if !value.starts_with("Root=") { + return None; + } + + let mut start_part = 0; + let mut trace_id = String::new(); + let mut parent_id = String::new(); + let mut sampled = String::new(); + + let length = value.len(); + while start_part < length { + let end_part = value[start_part..] + .find(';') + .map_or(length, |i| i + start_part); + let part = &value[start_part..end_part]; + + if part.starts_with("Root=") { + if trace_id.is_empty() { + trace_id = part[24..].to_string(); + } + } else if let Some(parent_part) = part.strip_prefix("Parent=") { + if parent_id.is_empty() { + parent_id = parent_part.to_string(); + } + } else if part.starts_with("Sampled=") && sampled.is_empty() { + sampled = part[8..].to_string(); + } + + if !trace_id.is_empty() && !parent_id.is_empty() && !sampled.is_empty() { + break; + } + start_part = end_part + 1; + } + + let trace_id = u64::from_str_radix(&trace_id, 16).ok()?; + let parent_id = u64::from_str_radix(&parent_id, 16).ok()?; + + if trace_id == 0 || parent_id == 0 { + debug!("awstrace_header contains empty trace or parent ID"); + return None; + } + + let sampling_priority = i8::from(sampled == "1"); + + Some(SpanContext { + // the context from AWS Header is used by Datadog only and does not contain the upper + // 64 bits like other 128 w3c compliant trace ids + trace_id, + span_id: parent_id, + sampling: Some(Sampling { + priority: Some(sampling_priority), + mechanism: None, + }), + origin: None, + tags: HashMap::new(), + links: Vec::new(), + }) +} + #[cfg(test)] +#[allow(clippy::unwrap_used)] mod tests { use super::*; use crate::lifecycle::invocation::triggers::test_utils::read_json_file; @@ -235,6 +306,7 @@ mod tests { approximate_receive_count: "1".to_string(), sent_timestamp: "1523232000000".to_string(), sender_id: "123456789012".to_string(), + aws_trace_header: None, }, message_attributes, md5_of_body: "{{{md5_of_body}}}".to_string(), @@ -425,4 +497,29 @@ mod tests { "generic-service" ); } + + #[test] + fn extract_java_sqs_header_context() { + let json = read_json_file("eventbridge_sqs_java_header_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize EventBridgeEvent"); + + assert_eq!( + extract_trace_context_from_aws_trace_header(Some( + event.attributes.aws_trace_header.unwrap().to_string() + )) + .unwrap(), + SpanContext { + trace_id: 130_944_522_478_755_159, + span_id: 9_032_698_535_745_367_362, + sampling: Some(Sampling { + priority: Some("0".parse().unwrap()), + mechanism: None, + }), + origin: None, + tags: HashMap::new(), + links: Vec::new(), + } + ); + } } diff --git a/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs b/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs index ee77434bc..6169be7d3 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs @@ -322,8 +322,8 @@ mod tests { let span_context = event.get_span_context(); let expected = SpanContext { - trace_id: 5744042798732701615, - span_id: 2902498116043018663, + trace_id: 5_744_042_798_732_701_615, + span_id: 2_902_498_116_043_018_663, sampling: Some(Sampling { priority: Some(1), mechanism: None, @@ -347,7 +347,7 @@ mod tests { String::from("2022-12-08T21:08:19.224Z") ); - assert_eq!(parent_id, 4340734536022949921); + assert_eq!(parent_id, 4_340_734_536_022_949_921); let parent_id = StepFunctionEvent::generate_parent_id( String::from("arn:aws:states:sa-east-1:601427271234:express:DatadogStateMachine:acaf1a67-336a-e854-1599-2a627eb2dd8a:c8baf081-31f1-464d-971f-70cb17d01111"), @@ -355,7 +355,7 @@ mod tests { String::from("2022-12-08T21:08:19.224Y") ); - assert_eq!(parent_id, 981693280319792699); + assert_eq!(parent_id, 981_693_280_319_792_699); } #[test] @@ -363,20 +363,20 @@ mod tests { let (lo_tid, hi_tid) = StepFunctionEvent::generate_trace_id(String::from( "arn:aws:states:sa-east-1:425362996713:stateMachine:MyStateMachine-b276uka1j", )); - let hex_tid = format!("{:x}", hi_tid); + let hex_tid = format!("{hi_tid:x}"); - assert_eq!(lo_tid, 1680583253837593461); - assert_eq!(hi_tid, 6984552746569958392); + assert_eq!(lo_tid, 1_680_583_253_837_593_461); + assert_eq!(hi_tid, 6_984_552_746_569_958_392); assert_eq!(hex_tid, "60ee1db79e4803f8"); let (lo_tid, hi_tid) = StepFunctionEvent::generate_trace_id( String::from("arn:aws:states:us-east-1:425362996713:execution:agocsTestSF:bc9f281c-3daa-4e5a-9a60-471a3810bf44") ); - let hex_tid = format!("{:x}", hi_tid); + let hex_tid = format!("{hi_tid:x}"); - assert_eq!(lo_tid, 5744042798732701615); - assert_eq!(hi_tid, 1807349139850867390); + assert_eq!(lo_tid, 5_744_042_798_732_701_615); + assert_eq!(hi_tid, 1_807_349_139_850_867_390); assert_eq!(hex_tid, "1914fe7789eb32be"); } diff --git a/bottlecap/src/metrics/enhanced/lambda.rs b/bottlecap/src/metrics/enhanced/lambda.rs index af5996ff6..f307b979b 100644 --- a/bottlecap/src/metrics/enhanced/lambda.rs +++ b/bottlecap/src/metrics/enhanced/lambda.rs @@ -743,6 +743,7 @@ mod tests { } #[test] + #[allow(clippy::too_many_lines)] fn test_disabled() { let (metrics_aggr, no_config) = setup(); let my_config = Arc::new(config::Config { @@ -991,8 +992,8 @@ mod tests { let (metrics_aggr, my_config) = setup(); let lambda = Lambda::new(metrics_aggr.clone(), my_config); - let tmp_max = 550461440.0; - let tmp_used = 12165120.0; + let tmp_max = 550_461_440.0; + let tmp_used = 12_165_120.0; Lambda::generate_tmp_enhanced_metrics( tmp_max, @@ -1001,9 +1002,9 @@ mod tests { None, ); - assert_sketch(&metrics_aggr, constants::TMP_MAX_METRIC, 550461440.0); - assert_sketch(&metrics_aggr, constants::TMP_USED_METRIC, 12165120.0); - assert_sketch(&metrics_aggr, constants::TMP_FREE_METRIC, 538296320.0); + assert_sketch(&metrics_aggr, constants::TMP_MAX_METRIC, 550_461_440.0); + assert_sketch(&metrics_aggr, constants::TMP_USED_METRIC, 12_165_120.0); + assert_sketch(&metrics_aggr, constants::TMP_FREE_METRIC, 538_296_320.0); } #[test] diff --git a/bottlecap/src/proc/mod.rs b/bottlecap/src/proc/mod.rs index 2aaf17474..f6fa819c2 100644 --- a/bottlecap/src/proc/mod.rs +++ b/bottlecap/src/proc/mod.rs @@ -330,8 +330,6 @@ mod tests { use super::*; use std::path::PathBuf; - const PRECISION: f64 = 1e-6; - fn path_from_root(file: &str) -> String { let mut safe_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); safe_path.push(file); @@ -341,8 +339,8 @@ mod tests { #[test] fn test_get_pid_list() { let path = "./tests/proc"; - let mut pids = get_pid_list_from_path(path); - pids.sort(); + let mut pids = get_pid_list_from_path(path_from_root(path).as_str()); + pids.sort_unstable(); assert_eq!(pids.len(), 2); assert_eq!(pids[0], 13); assert_eq!(pids[1], 142); @@ -358,8 +356,8 @@ mod tests { let network_data_result = get_network_data_from_path(path_from_root(path).as_str()); assert!(network_data_result.is_ok()); let network_data = network_data_result.unwrap(); - assert!((network_data.rx_bytes - 180.0).abs() < PRECISION); - assert!((network_data.tx_bytes - 254.0).abs() < PRECISION); + assert!((network_data.rx_bytes - 180.0).abs() < f64::EPSILON); + assert!((network_data.tx_bytes - 254.0).abs() < f64::EPSILON); let path = "./tests/proc/net/invalid_dev_malformed"; let network_data_result = get_network_data_from_path(path); @@ -384,9 +382,9 @@ mod tests { let cpu_data_result = get_cpu_data_from_path(path_from_root(path).as_str()); assert!(cpu_data_result.is_ok()); let cpu_data = cpu_data_result.unwrap(); - assert!((cpu_data.total_user_time_ms - 23370.0).abs() < PRECISION); - assert!((cpu_data.total_system_time_ms - 1880.0).abs() < PRECISION); - assert!((cpu_data.total_idle_time_ms - 178_380.0).abs() < PRECISION); + assert!((cpu_data.total_user_time_ms - 23370.0).abs() < f64::EPSILON); + assert!((cpu_data.total_system_time_ms - 1880.0).abs() < f64::EPSILON); + assert!((cpu_data.total_idle_time_ms - 178_380.0).abs() < f64::EPSILON); assert_eq!(cpu_data.individual_cpu_idle_times.len(), 2); assert!( (*cpu_data @@ -395,7 +393,7 @@ mod tests { .expect("cpu0 not found") - 91880.0) .abs() - < PRECISION + < f64::EPSILON ); assert!( (*cpu_data @@ -404,7 +402,7 @@ mod tests { .expect("cpu1 not found") - 86490.0) .abs() - < PRECISION + < f64::EPSILON ); let path = "./tests/proc/stat/invalid_stat_non_numerical_value_1"; @@ -438,7 +436,7 @@ mod tests { let uptime_data_result = get_uptime_from_path(path_from_root(path).as_str()); assert!(uptime_data_result.is_ok()); let uptime_data = uptime_data_result.unwrap(); - assert!((uptime_data - 3_213_103_123_000.0).abs() < PRECISION); + assert!((uptime_data - 3_213_103_123_000.0).abs() < f64::EPSILON); let path = "./tests/proc/uptime/invalid_data_uptime"; let uptime_data_result = get_uptime_from_path(path); @@ -456,29 +454,29 @@ mod tests { #[test] fn test_get_fd_max_data() { let path = "./tests/proc/process/valid"; - let pids = get_pid_list_from_path(path); + let pids = get_pid_list_from_path(path_from_root(path).as_str()); let fd_max = get_fd_max_data_from_path(path, &pids); - assert!((fd_max - 900.0).abs() < PRECISION); + assert!((fd_max - 900.0).abs() < f64::EPSILON); let path = "./tests/proc/process/invalid_malformed"; let fd_max = get_fd_max_data_from_path(path, &pids); // assert that fd_max is equal to AWS Lambda limit - assert!((fd_max - constants::LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT).abs() < PRECISION); + assert!((fd_max - constants::LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT).abs() < f64::EPSILON); let path = "./tests/proc/process/invalid_missing"; let fd_max = get_fd_max_data_from_path(path, &pids); // assert that fd_max is equal to AWS Lambda limit - assert!((fd_max - constants::LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT).abs() < PRECISION); + assert!((fd_max - constants::LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT).abs() < f64::EPSILON); } #[test] fn test_get_fd_use_data() { let path = "./tests/proc/process/valid"; - let pids = get_pid_list_from_path(path); + let pids = get_pid_list_from_path(path_from_root(path).as_str()); let fd_use_result = get_fd_use_data_from_path(path, &pids); assert!(fd_use_result.is_ok()); let fd_use = fd_use_result.unwrap(); - assert!((fd_use - 5.0).abs() < PRECISION); + assert!((fd_use - 5.0).abs() < f64::EPSILON); let path = "./tests/proc/process/invalid_missing"; let fd_use_result = get_fd_use_data_from_path(path, &pids); @@ -488,33 +486,35 @@ mod tests { #[test] fn test_get_threads_max_data() { let path = "./tests/proc/process/valid"; - let pids = get_pid_list_from_path(path); + let pids = get_pid_list_from_path(path_from_root(path).as_str()); let threads_max = get_threads_max_data_from_path(path, &pids); - assert!((threads_max - 1024.0).abs() < PRECISION); + assert!((threads_max - 1024.0).abs() < f64::EPSILON); let path = "./tests/proc/process/invalid_malformed"; let threads_max = get_threads_max_data_from_path(path, &pids); // assert that threads_max is equal to AWS Lambda limit assert!( - (threads_max - constants::LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT).abs() < PRECISION + (threads_max - constants::LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT).abs() + < f64::EPSILON ); let path = "./tests/proc/process/invalid_missing"; let threads_max = get_threads_max_data_from_path(path, &pids); // assert that threads_max is equal to AWS Lambda limit assert!( - (threads_max - constants::LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT).abs() < PRECISION + (threads_max - constants::LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT).abs() + < f64::EPSILON ); } #[test] fn test_get_threads_use_data() { let path = "./tests/proc/process/valid"; - let pids = get_pid_list_from_path(path); + let pids = get_pid_list_from_path(path_from_root(path).as_str()); let threads_use_result = get_threads_use_data_from_path(path, &pids); assert!(threads_use_result.is_ok()); let threads_use = threads_use_result.unwrap(); - assert!((threads_use - 5.0).abs() < PRECISION); + assert!((threads_use - 5.0).abs() < f64::EPSILON); let path = "./tests/proc/process/invalid_missing"; let threads_use_result = get_threads_use_data_from_path(path, &pids); diff --git a/bottlecap/src/traces/propagation/mod.rs b/bottlecap/src/traces/propagation/mod.rs index 723666406..300e6eff9 100644 --- a/bottlecap/src/traces/propagation/mod.rs +++ b/bottlecap/src/traces/propagation/mod.rs @@ -211,9 +211,13 @@ pub mod tests { use super::*; + fn lower_64_bits(value: u128) -> u64 { + (value & 0xFFFF_FFFF_FFFF_FFFF) as u64 + } + lazy_static! { static ref TRACE_ID: u128 = 171_395_628_812_617_415_352_188_477_958_425_669_623; - static ref TRACE_ID_LOWER_ORDER_BITS: u64 = *TRACE_ID as u64; + static ref TRACE_ID_LOWER_ORDER_BITS: u64 = lower_64_bits(*TRACE_ID); static ref TRACE_ID_HEX: String = String::from("80f198ee56343ba864fe8b2a57d3eff7"); // TraceContext Headers @@ -771,13 +775,15 @@ pub mod tests { #[test] fn test_new_filter_propagators() { - let mut config = config::Config::default(); - config.trace_propagation_style_extract = vec![ - TracePropagationStyle::Datadog, - TracePropagationStyle::TraceContext, - TracePropagationStyle::B3, - TracePropagationStyle::B3Multi, - ]; + let config = config::Config { + trace_propagation_style_extract: vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + TracePropagationStyle::B3, + TracePropagationStyle::B3Multi, + ], + ..Default::default() + }; let propagator = DatadogCompositePropagator::new(Arc::new(config)); @@ -786,8 +792,10 @@ pub mod tests { #[test] fn test_new_no_propagators() { - let mut config = config::Config::default(); - config.trace_propagation_style_extract = vec![TracePropagationStyle::None]; + let config = config::Config { + trace_propagation_style_extract: vec![TracePropagationStyle::None], + ..Default::default() + }; let propagator = DatadogCompositePropagator::new(Arc::new(config)); assert_eq!(propagator.propagators.len(), 0); @@ -795,11 +803,13 @@ pub mod tests { #[test] fn test_extract_available_contexts() { - let mut config = config::Config::default(); - config.trace_propagation_style_extract = vec![ - TracePropagationStyle::Datadog, - TracePropagationStyle::TraceContext, - ]; + let config = config::Config { + trace_propagation_style_extract: vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + ], + ..Default::default() + }; let propagator = DatadogCompositePropagator::new(Arc::new(config)); @@ -835,8 +845,10 @@ pub mod tests { #[test] fn test_extract_available_contexts_no_contexts() { - let mut config = config::Config::default(); - config.trace_propagation_style_extract = vec![TracePropagationStyle::Datadog]; + let config = config::Config { + trace_propagation_style_extract: vec![TracePropagationStyle::Datadog], + ..Default::default() + }; let propagator = DatadogCompositePropagator::new(Arc::new(config)); @@ -868,6 +880,6 @@ pub mod tests { DatadogCompositePropagator::attach_baggage(&mut context, &carrier); assert_eq!(context.tags.len(), 1); - assert_eq!(context.tags.get("key1").unwrap(), "value1"); + assert_eq!(context.tags.get("key1").expect("Missing tag"), "value1"); } } diff --git a/bottlecap/src/traces/propagation/text_map_propagator.rs b/bottlecap/src/traces/propagation/text_map_propagator.rs index f1c5cbcf4..520428630 100644 --- a/bottlecap/src/traces/propagation/text_map_propagator.rs +++ b/bottlecap/src/traces/propagation/text_map_propagator.rs @@ -456,6 +456,7 @@ impl TraceContextPropagator { } #[cfg(test)] +#[allow(clippy::unwrap_used)] mod test { use super::*; diff --git a/bottlecap/tests/payloads/eventbridge_sqs_event.json b/bottlecap/tests/payloads/eventbridge_sqs_event.json index 033740244..b3a392a50 100644 --- a/bottlecap/tests/payloads/eventbridge_sqs_event.json +++ b/bottlecap/tests/payloads/eventbridge_sqs_event.json @@ -6,7 +6,6 @@ "body": "{\"version\":\"0\",\"id\":\"af718b2a-b987-e8c0-7a2b-a188fad2661a\",\"detail-type\":\"my.Detail\",\"source\":\"my.Source\",\"account\":\"425362996713\",\"time\":\"2023-08-03T22:49:03Z\",\"region\":\"us-east-1\",\"resources\":[],\"detail\":{\"text\":\"Hello, world!\",\"_datadog\":{\"x-datadog-trace-id\":\"7379586022458917877\",\"x-datadog-parent-id\":\"2644033662113726488\",\"x-datadog-sampling-priority\":\"1\",\"x-datadog-tags\":\"_dd.p.dm=-0\",\"traceparent\":\"00-000000000000000066698e63821a03f5-24b17e9b6476c018-01\",\"tracestate\":\"dd=t.dm:-0;s:1\"}}}", "attributes": { "ApproximateReceiveCount": "1", - "AWSTraceHeader": "Root=1-64cc2edd-112fbf1701d1355973a11d57;Parent=7d5a9776024b2d42;Sampled=0", "SentTimestamp": "1691102943638", "SenderId": "AIDAJXNJGGKNS7OSV23OI", "ApproximateFirstReceiveTimestamp": "1691102943647" diff --git a/bottlecap/tests/payloads/eventbridge_sqs_java_header_event.json b/bottlecap/tests/payloads/eventbridge_sqs_java_header_event.json new file mode 100644 index 000000000..033740244 --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_sqs_java_header_event.json @@ -0,0 +1,21 @@ +{ + "Records": [ + { + "messageId": "e995e54f-1724-41fa-82c0-8b81821f854e", + "receiptHandle": "AQEB4mIfRcyqtzn1X5Ss+ConhTejVGc+qnAcmu3/Z9ZvbNkaPcpuDLX/bzvPD/ZkAXJUXZcemGSJmd7L3snZHKMP2Ck8runZiyl4mubiLb444pZvdiNPuGRJ6a3FvgS/GQPzho/9nNMyOi66m8Viwh70v4EUCPGO4JmD3TTDAUrrcAnqU4WSObjfC/NAp9bI6wH2CEyAYEfex6Nxplbl/jBf9ZUG0I3m3vQd0Q4l4gd4jIR4oxQUglU2Tldl4Kx5fMUAhTRLAENri6HsY81avBkKd9FAuxONlsITB5uj02kOkvLlRGEcalqsKyPJ7AFaDLrOLaL3U+yReroPEJ5R5nwhLOEbeN5HROlZRXeaAwZOIN8BjqdeooYTIOrtvMEVb7a6OPLMdH1XB+ddevtKAH8K9Tm2ZjpaA7dtBGh1zFVHzBk=", + "body": "{\"version\":\"0\",\"id\":\"af718b2a-b987-e8c0-7a2b-a188fad2661a\",\"detail-type\":\"my.Detail\",\"source\":\"my.Source\",\"account\":\"425362996713\",\"time\":\"2023-08-03T22:49:03Z\",\"region\":\"us-east-1\",\"resources\":[],\"detail\":{\"text\":\"Hello, world!\",\"_datadog\":{\"x-datadog-trace-id\":\"7379586022458917877\",\"x-datadog-parent-id\":\"2644033662113726488\",\"x-datadog-sampling-priority\":\"1\",\"x-datadog-tags\":\"_dd.p.dm=-0\",\"traceparent\":\"00-000000000000000066698e63821a03f5-24b17e9b6476c018-01\",\"tracestate\":\"dd=t.dm:-0;s:1\"}}}", + "attributes": { + "ApproximateReceiveCount": "1", + "AWSTraceHeader": "Root=1-64cc2edd-112fbf1701d1355973a11d57;Parent=7d5a9776024b2d42;Sampled=0", + "SentTimestamp": "1691102943638", + "SenderId": "AIDAJXNJGGKNS7OSV23OI", + "ApproximateFirstReceiveTimestamp": "1691102943647" + }, + "messageAttributes": {}, + "md5OfBody": "93d9f0cd8886d1e000a1a0b7007bffc4", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:us-east-1:425362996713:lambda-eb-sqs-lambda-dev-demo-queue", + "awsRegion": "us-east-1" + } + ] +} From ab907101d12233d60512cfc62176e50e6a97447e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Fri, 15 Nov 2024 16:59:38 -0500 Subject: [PATCH 38/41] increase layer size check again... (#458) --- .gitlab/scripts/check_layer_size.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/scripts/check_layer_size.sh b/.gitlab/scripts/check_layer_size.sh index c53615cb6..42095354b 100755 --- a/.gitlab/scripts/check_layer_size.sh +++ b/.gitlab/scripts/check_layer_size.sh @@ -15,7 +15,7 @@ if [ -z "$LAYER_FILE" ]; then fi MAX_LAYER_COMPRESSED_SIZE_KB=$(expr 20 \* 1024) # 20 MB, amd64 is 19, while arm64 is 18 -MAX_LAYER_UNCOMPRESSED_SIZE_KB=$(expr 50 \* 1024) # 50 MB, amd is 50, while arm64 is 47 +MAX_LAYER_UNCOMPRESSED_SIZE_KB=$(expr 51 \* 1024) # 50 MB, amd is 50.5, while arm64 is 47 LAYERS_DIR=".layers" From 49e400976e0e40b258b75b8cab5928463168733f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Tue, 19 Nov 2024 13:08:37 -0500 Subject: [PATCH 39/41] return early on `tmp`, and `process` metrics (#459) --- bottlecap/src/metrics/enhanced/lambda.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/bottlecap/src/metrics/enhanced/lambda.rs b/bottlecap/src/metrics/enhanced/lambda.rs index f307b979b..2129853a2 100644 --- a/bottlecap/src/metrics/enhanced/lambda.rs +++ b/bottlecap/src/metrics/enhanced/lambda.rs @@ -440,7 +440,12 @@ impl Lambda { } } + #[allow(unreachable_code)] + #[allow(unused_variables)] + #[allow(unused_mut)] pub fn set_tmp_enhanced_metrics(&self, mut send_metrics: Receiver<()>) { + // Temporarily disabled + return; if !self.config.enhanced_metrics { return; } @@ -543,7 +548,12 @@ impl Lambda { } } + #[allow(unreachable_code)] + #[allow(unused_variables)] + #[allow(unused_mut)] pub fn set_process_enhanced_metrics(&self, mut send_metrics: Receiver<()>) { + // Temporarily disabled + return; if !self.config.enhanced_metrics { return; } From 00e1a22d8a7054a6483566ba8d6a60f8b63f64fa Mon Sep 17 00:00:00 2001 From: alexgallotta <5581237+alexgallotta@users.noreply.github.com> Date: Tue, 19 Nov 2024 13:33:06 -0500 Subject: [PATCH 40/41] remove useless debug log (#460) --- bottlecap/src/traces/trace_agent.rs | 1 - bottlecap/src/traces/trace_processor.rs | 2 -- 2 files changed, 3 deletions(-) diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 57e6fd4c6..c617def85 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -219,7 +219,6 @@ impl TraceAgent { tags_provider: Arc, version: ApiVersion, ) -> http::Result> { - debug!("Received traces to process"); let (parts, body) = req.into_parts(); if let Some(response) = http_utils::verify_request_content_length( diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 5974c7d17..123c6a066 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -11,7 +11,6 @@ use ddcommon::Endpoint; use std::str::FromStr; use std::sync::Arc; -use tracing::debug; use crate::config; use crate::traces::{ @@ -122,7 +121,6 @@ impl TraceProcessor for ServerlessTraceProcessor { traces: Vec>, body_size: usize, ) -> SendData { - debug!("Received traces to process"); let payload = trace_utils::collect_trace_chunks( V07(traces), &header_tags, From db05ac448a781a1b058144ab2530d065313136a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?jordan=20gonz=C3=A1lez?= <30836115+duncanista@users.noreply.github.com> Date: Tue, 19 Nov 2024 15:05:16 -0500 Subject: [PATCH 41/41] fmt --- bottlecap/src/traces/trace_processor.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 123c6a066..f060dad43 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -11,7 +11,6 @@ use ddcommon::Endpoint; use std::str::FromStr; use std::sync::Arc; - use crate::config; use crate::traces::{ AWS_XRAY_DAEMON_ADDRESS_URL_PREFIX, DNS_LOCAL_HOST_ADDRESS_URL_PREFIX,