diff --git a/Cargo.lock b/Cargo.lock index f16b7a3594..32454cdc83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2867,6 +2867,7 @@ dependencies = [ "bytes", "cc", "const_format", + "criterion", "futures", "futures-core", "futures-util", diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 51ccb8e07d..0c1a897df2 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -95,6 +95,7 @@ core-foundation-sys,https://github.com/servo/core-foundation-rs,MIT OR Apache-2. cpp_demangle,https://github.com/gimli-rs/cpp_demangle,MIT OR Apache-2.0,"Nick Fitzgerald , Jim Blandy , Kyle Huey " cpufeatures,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers crc32fast,https://github.com/srijs/rust-crc32fast,MIT OR Apache-2.0,"Sam Rijs , Alex Crichton " +criterion,https://github.com/bheisler/criterion.rs,Apache-2.0 OR MIT,"Jorge Aparicio , Brook Heisler " criterion-plot,https://github.com/bheisler/criterion.rs,MIT OR Apache-2.0,"Jorge Aparicio , Brook Heisler " critical-section,https://github.com/rust-embedded/critical-section,MIT OR Apache-2.0,The critical-section Authors crossbeam-channel,https://github.com/crossbeam-rs/crossbeam,MIT OR Apache-2.0,The crossbeam-channel Authors diff --git a/datadog-ipc/benches/ipc.rs b/datadog-ipc/benches/ipc.rs index 929906fd89..bd80577927 100644 --- a/datadog-ipc/benches/ipc.rs +++ b/datadog-ipc/benches/ipc.rs @@ -15,7 +15,7 @@ use tokio::runtime; fn criterion_benchmark(c: &mut Criterion) { let (conn_server, conn_client) = datadog_ipc::SeqpacketConn::socketpair().unwrap(); - let worker = thread::spawn(move || { + let _worker = thread::spawn(move || { let rt = runtime::Builder::new_current_thread() .enable_all() .build() @@ -36,15 +36,6 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("two way interface", |b| { b.iter(|| channel.call_req_cnt().unwrap()) }); - - #[cfg(not(target_arch = "aarch64"))] - println!( - "Total requests handled: {}", - channel.call_req_cnt().unwrap() - ); - - drop(channel); - worker.join().unwrap(); } #[cfg(unix)] diff --git a/libdd-common/Cargo.toml b/libdd-common/Cargo.toml index f9d4f1ac19..7419853b0a 100644 --- a/libdd-common/Cargo.toml +++ b/libdd-common/Cargo.toml @@ -35,6 +35,7 @@ regex = "1.5" # Use rustls-no-provider instead of rustls to avoid reqwest forcing aws-lc-rs as the crypto # backend. We install the ring provider explicitly in connector/mod.rs instead. reqwest = { version = "0.13.2", features = ["rustls-no-provider", "hickory-dns"], default-features = false, optional = true } +criterion = { version = "0.5.1", optional = true } # Pinned to <0.8.3: version 0.8.3+ pulls in openssl-probe@0.2 which probes multiple # certificate directories and parses individual cert files instead of loading a single # bundle, adding unnecessary I/O overhead in latency-sensitive environments. @@ -103,6 +104,8 @@ fips = ["tls-core", "hyper-rustls/fips"] reqwest = ["dep:reqwest", "test-utils"] # Enable test utilities for use in other crates test-utils = ["dep:httparse", "dep:rand", "dep:mime", "dep:multer"] +# Enable benchmark utilities (ReportingAllocator, Criterion allocation measurement) +bench-utils = ["dep:criterion"] [lints.rust] # We run coverage checks in our github actions. These checks are run with diff --git a/libdd-common/src/bench_utils.rs b/libdd-common/src/bench_utils.rs new file mode 100644 index 0000000000..17337f6cd9 --- /dev/null +++ b/libdd-common/src/bench_utils.rs @@ -0,0 +1,333 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Scaffolding for memory usage benchmarks. +//! +//! See the `ReportingAllocator` type and `memory_allocated_measurement` for usage. + +#![allow(missing_docs)] + +use std::{ + alloc::{GlobalAlloc, System}, + cell::Cell, + time::Duration, +}; + +use criterion::{Criterion, Throughput}; + +pub trait MeasurementName { + fn name() -> &'static str; +} + +impl MeasurementName for criterion::measurement::WallTime { + fn name() -> &'static str { + "wall_time" + } +} + +pub fn memory_allocated_measurement( + global_alloc: &'static ReportingAllocator, +) -> Criterion> { + Criterion::default() + .with_measurement(AllocatedBytesMeasurement(Cell::new(false), global_alloc)) + .measurement_time(Duration::from_millis(1)) + .warm_up_time(Duration::from_millis(1)) + .without_plots() + .plotting_backend(criterion::PlottingBackend::None) + .sample_size(10) +} + +#[derive(Debug)] +struct AllocStats { + allocated_bytes: usize, + #[allow(dead_code)] + allocations: usize, +} + +pub struct ReportingAllocator { + alloc: T, + allocated_bytes: std::sync::atomic::AtomicUsize, + allocations: std::sync::atomic::AtomicUsize, +} + +impl ReportingAllocator { + pub const fn new(alloc: T) -> Self { + Self { + alloc, + allocated_bytes: std::sync::atomic::AtomicUsize::new(0), + allocations: std::sync::atomic::AtomicUsize::new(0), + } + } + + fn stats(&self) -> AllocStats { + AllocStats { + allocated_bytes: self + .allocated_bytes + .load(std::sync::atomic::Ordering::Relaxed), + allocations: self.allocations.load(std::sync::atomic::Ordering::Relaxed), + } + } +} + +unsafe impl GlobalAlloc for ReportingAllocator { + unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 { + self.allocated_bytes + .fetch_add(layout.size(), std::sync::atomic::Ordering::Relaxed); + self.allocations + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); + self.alloc.alloc(layout) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) { + self.alloc.dealloc(ptr, layout); + } +} + +pub struct AllocatedBytesMeasurement( + Cell, + &'static ReportingAllocator, +); + +impl MeasurementName for AllocatedBytesMeasurement { + fn name() -> &'static str { + "allocated_bytes" + } +} + +impl criterion::measurement::Measurement for AllocatedBytesMeasurement { + type Intermediate = usize; + + type Value = usize; + + fn start(&self) -> Self::Intermediate { + self.1.stats().allocated_bytes + } + + fn end(&self, i: Self::Intermediate) -> Self::Value { + self.1.stats().allocated_bytes - i + } + + fn add(&self, v1: &Self::Value, v2: &Self::Value) -> Self::Value { + *v1 + *v2 + } + + fn zero(&self) -> Self::Value { + 0 + } + + fn to_f64(&self, value: &Self::Value) -> f64 { + let b = self.0.get(); + self.0.set(!b); + // Criterion does not handle all-identical measurement values well, and since + // allocation is deterministic that tends to happen a lot. Add a small +/- epsilon + // so each pair of measurements differs slightly without skewing the distribution. + *value as f64 + if b { 0.01 } else { -0.01 } + } + + fn formatter(&self) -> &dyn criterion::measurement::ValueFormatter { + &AllocationFormatter + } +} + +struct AllocationFormatter; + +impl criterion::measurement::ValueFormatter for AllocationFormatter { + fn scale_values(&self, typical_value: f64, values: &mut [f64]) -> &'static str { + let log_scale: f64 = typical_value.log10().round(); + if log_scale.is_infinite() || log_scale.is_nan() || log_scale < 0.0 { + return "b"; + } + let scale = (log_scale as i32 / 3).min(4); + values.iter_mut().for_each(|v| *v /= 10_f64.powi(scale * 3)); + match scale { + 0 => "b", + 1 => "Kb", + 2 => "Mb", + 3 => "Gb", + _ => "Tb", + } + } + + fn scale_throughputs( + &self, + _typical_value: f64, + throughput: &criterion::Throughput, + _values: &mut [f64], + ) -> &'static str { + match throughput { + Throughput::Bytes(_) => "B/s", + Throughput::BytesDecimal(_) => "B/s", + Throughput::Elements(_) => "elements/s", + } + } + + fn scale_for_machines(&self, _values: &mut [f64]) -> &'static str { + "b" + } +} + +#[cfg(test)] +mod tests { + use super::*; + use criterion::measurement::{Measurement, ValueFormatter}; + use std::alloc::{GlobalAlloc, Layout, System}; + + static SHARED: ReportingAllocator = ReportingAllocator::new(System); + + // --- ReportingAllocator --- + + #[test] + fn new_starts_at_zero() { + let a = ReportingAllocator::new(System); + let s = a.stats(); + assert_eq!(s.allocated_bytes, 0); + assert_eq!(s.allocations, 0); + } + + #[test] + fn alloc_increments_both_counters() { + let a = ReportingAllocator::new(System); + let layout = Layout::from_size_align(64, 8).unwrap(); + let ptr = unsafe { a.alloc(layout) }; + assert!(!ptr.is_null()); + assert_eq!(a.stats().allocated_bytes, 64); + assert_eq!(a.stats().allocations, 1); + unsafe { a.dealloc(ptr, layout) }; + } + + #[test] + fn dealloc_does_not_change_counters() { + let a = ReportingAllocator::new(System); + let layout = Layout::from_size_align(32, 8).unwrap(); + let ptr = unsafe { a.alloc(layout) }; + let bytes_after_alloc = a.stats().allocated_bytes; + unsafe { a.dealloc(ptr, layout) }; + assert_eq!(a.stats().allocated_bytes, bytes_after_alloc); + assert_eq!(a.stats().allocations, 1); + } + + #[test] + fn multiple_allocs_accumulate() { + let a = ReportingAllocator::new(System); + let l1 = Layout::from_size_align(16, 8).unwrap(); + let l2 = Layout::from_size_align(32, 8).unwrap(); + let p1 = unsafe { a.alloc(l1) }; + let p2 = unsafe { a.alloc(l2) }; + assert_eq!(a.stats().allocated_bytes, 48); + assert_eq!(a.stats().allocations, 2); + unsafe { + a.dealloc(p1, l1); + a.dealloc(p2, l2); + } + } + + // --- AllocatedBytesMeasurement --- + + #[test] + fn measurement_zero_and_add() { + let m = AllocatedBytesMeasurement(Cell::new(false), &SHARED); + assert_eq!(m.zero(), 0); + assert_eq!(m.add(&100, &200), 300); + } + + #[test] + fn measurement_start_end_tracks_delta() { + let m = AllocatedBytesMeasurement(Cell::new(false), &SHARED); + let start = m.start(); + let layout = Layout::from_size_align(256, 8).unwrap(); + let ptr = unsafe { SHARED.alloc(layout) }; + // Other tests may also allocate via SHARED concurrently, so allow >= 256. + assert!(m.end(start) >= 256); + unsafe { SHARED.dealloc(ptr, layout) }; + } + + #[test] + fn measurement_to_f64_alternates_epsilon() { + let m = AllocatedBytesMeasurement(Cell::new(false), &SHARED); + // Initial state: Cell = false → first result is value - 0.01 + assert!((m.to_f64(&1000) - 999.99).abs() < 1e-9); + // After first call: Cell = true → result is value + 0.01 + assert!((m.to_f64(&1000) - 1000.01).abs() < 1e-9); + // Alternates back + assert!((m.to_f64(&1000) - 999.99).abs() < 1e-9); + } + + #[test] + fn measurement_name() { + assert_eq!( + AllocatedBytesMeasurement::::name(), + "allocated_bytes" + ); + } + + // --- AllocationFormatter::scale_values --- + + #[test] + fn scale_values_zero_returns_bytes() { + let f = AllocationFormatter; + let mut v = [42.0_f64]; + assert_eq!(f.scale_values(0.0, &mut v), "b"); + } + + #[test] + fn scale_values_sub_byte_returns_bytes() { + let f = AllocationFormatter; + let mut v = [0.5_f64]; + // log10(0.1) = -1 → negative → "b" + assert_eq!(f.scale_values(0.1, &mut v), "b"); + } + + #[test] + fn scale_values_bytes() { + let f = AllocationFormatter; + let mut v = [1.0_f64]; + assert_eq!(f.scale_values(1.0, &mut v), "b"); + assert!((v[0] - 1.0).abs() < 1e-9); + } + + #[test] + fn scale_values_kilobytes() { + let f = AllocationFormatter; + let mut v = [2000.0_f64]; + assert_eq!(f.scale_values(1000.0, &mut v), "Kb"); + assert!((v[0] - 2.0).abs() < 1e-9); + } + + #[test] + fn scale_values_megabytes() { + let f = AllocationFormatter; + let mut v = [3_000_000.0_f64]; + assert_eq!(f.scale_values(1_000_000.0, &mut v), "Mb"); + assert!((v[0] - 3.0).abs() < 1e-9); + } + + #[test] + fn scale_values_gigabytes() { + let f = AllocationFormatter; + let mut v = [4_000_000_000.0_f64]; + assert_eq!(f.scale_values(1_000_000_000.0, &mut v), "Gb"); + assert!((v[0] - 4.0).abs() < 1e-9); + } + + #[test] + fn scale_values_terabytes() { + let f = AllocationFormatter; + let mut v = [5_000_000_000_000.0_f64]; + assert_eq!(f.scale_values(1_000_000_000_000.0, &mut v), "Tb"); + assert!((v[0] - 5.0).abs() < 1e-9); + } + + #[test] + fn scale_values_very_large_clamps_to_terabytes() { + let f = AllocationFormatter; + let mut v = [1e18_f64]; + assert_eq!(f.scale_values(1e18, &mut v), "Tb"); + } + + #[test] + fn scale_for_machines_returns_bytes_unit() { + let f = AllocationFormatter; + let mut v = [1000.0_f64]; + assert_eq!(f.scale_for_machines(&mut v), "b"); + } +} diff --git a/libdd-common/src/lib.rs b/libdd-common/src/lib.rs index 1922446c6c..2578459260 100644 --- a/libdd-common/src/lib.rs +++ b/libdd-common/src/lib.rs @@ -23,6 +23,8 @@ pub mod dump_server; pub mod entity_id; #[macro_use] pub mod cstr; +#[cfg(feature = "bench-utils")] +pub mod bench_utils; pub mod config; pub mod error; pub mod http_common; diff --git a/libdd-trace-utils/Cargo.toml b/libdd-trace-utils/Cargo.toml index 9e6b3655fe..bfacc33037 100644 --- a/libdd-trace-utils/Cargo.toml +++ b/libdd-trace-utils/Cargo.toml @@ -66,6 +66,7 @@ getrandom = { version = "0.2", features = ["js"] } [dev-dependencies] libdd-capabilities-impl = { version = "0.1.0", path = "../libdd-capabilities-impl" } +libdd-common = { path = "../libdd-common", default-features = false, features = ["bench-utils"] } bolero = "0.13" criterion = "0.5.1" httpmock = { version = "0.8.0-alpha.1" } diff --git a/libdd-trace-utils/benches/deserialization.rs b/libdd-trace-utils/benches/deserialization.rs index c073446f37..767082e5be 100644 --- a/libdd-trace-utils/benches/deserialization.rs +++ b/libdd-trace-utils/benches/deserialization.rs @@ -1,7 +1,10 @@ // Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use std::alloc::System; + use criterion::{black_box, criterion_group, Criterion}; +use libdd_common::bench_utils::{memory_allocated_measurement, AllocatedBytesMeasurement}; use libdd_trace_utils::tracer_payload::{decode_to_trace_chunks, TraceEncoding}; use serde_json::{json, Value}; @@ -81,4 +84,31 @@ pub fn deserialize_msgpack_to_internal(c: &mut Criterion) { ); } +fn deserialize_msgpack_to_internal_allocs(c: &mut Criterion>) { + let data = rmp_serde::to_vec(&generate_trace_chunks(20, 2_075)) + .expect("Failed to serialize test spans."); + let data_as_bytes = libdd_tinybytes::Bytes::copy_from_slice(&data); + + c.bench_function( + "benching deserializing traces from msgpack to their internal representation (allocs)", + |b| { + b.iter_batched( + || data_as_bytes.clone(), + |data_as_bytes| { + let result = + black_box(decode_to_trace_chunks(data_as_bytes, TraceEncoding::V04)); + assert!(result.is_ok()); + result + }, + criterion::BatchSize::LargeInput, + ); + }, + ); +} + criterion_group!(deserialize_benches, deserialize_msgpack_to_internal); +criterion_group!( + name = deserialize_alloc_benches; + config = memory_allocated_measurement(&super::GLOBAL); + targets = deserialize_msgpack_to_internal_allocs +); diff --git a/libdd-trace-utils/benches/main.rs b/libdd-trace-utils/benches/main.rs index de08a58ae2..0d86f25ee5 100644 --- a/libdd-trace-utils/benches/main.rs +++ b/libdd-trace-utils/benches/main.rs @@ -1,12 +1,19 @@ // Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use std::alloc::System; + use criterion::criterion_main; +use libdd_common::bench_utils::ReportingAllocator; + +#[global_allocator] +pub static GLOBAL: ReportingAllocator = ReportingAllocator::new(System); mod deserialization; mod serialization; criterion_main!( serialization::serialize_benches, - deserialization::deserialize_benches + deserialization::deserialize_benches, + deserialization::deserialize_alloc_benches );