diff --git a/bin_tests/src/artifacts.rs b/bin_tests/src/artifacts.rs index 3f5341e4d9..36fc5a3bb8 100644 --- a/bin_tests/src/artifacts.rs +++ b/bin_tests/src/artifacts.rs @@ -29,7 +29,6 @@ pub fn crashtracker_bin_test(profile: BuildProfile, panic_abort: bool) -> Artifa } /// Creates an ArtifactsBuild for the crashing_test_app binary. -#[cfg(not(target_os = "macos"))] pub fn crashing_app(profile: BuildProfile, panic_abort: bool) -> ArtifactsBuild { ArtifactsBuild { name: "crashing_test_app".to_owned(), @@ -90,15 +89,11 @@ pub fn all_prebuild_artifacts() -> Vec { artifacts.push(crashtracker_receiver(profile)); artifacts.push(test_the_tests(profile)); artifacts.push(profiling_ffi(profile)); - - #[cfg(not(target_os = "macos"))] artifacts.push(crashing_app(profile, false)); } // Panic abort variants (used by panic hook tests) artifacts.push(crashtracker_bin_test(BuildProfile::Debug, true)); - - #[cfg(not(target_os = "macos"))] artifacts.push(crashing_app(BuildProfile::Debug, true)); artifacts diff --git a/bin_tests/tests/crashtracker_bin_test.rs b/bin_tests/tests/crashtracker_bin_test.rs index 20c36e2c2c..2e8f2d2e98 100644 --- a/bin_tests/tests/crashtracker_bin_test.rs +++ b/bin_tests/tests/crashtracker_bin_test.rs @@ -423,19 +423,16 @@ fn test_crash_tracking_errors_intake_uds_socket() { /// DD_CRASHTRACK_END_STACKTRACE. Error: Can't set non-existant stack complete\n") #[test] #[cfg_attr(miri, ignore)] -#[cfg(not(target_os = "macos"))] fn test_crash_tracking_bin_panic() { test_crash_tracking_app("panic"); } #[test] -#[cfg(not(target_os = "macos"))] #[cfg_attr(miri, ignore)] fn test_crash_tracking_bin_segfault() { test_crash_tracking_app("segfault"); } -#[cfg(not(target_os = "macos"))] fn test_crash_tracking_app(crash_type: &str) { use bin_tests::test_runner::run_custom_crash_test; @@ -484,7 +481,6 @@ fn test_crash_tracking_app(crash_type: &str) { #[test] #[cfg_attr(miri, ignore)] -#[cfg(not(target_os = "macos"))] // Same restriction as other panic tests fn test_crash_tracking_bin_panic_hook_after_fork() { test_panic_hook_mode( "panic_hook_after_fork", @@ -495,14 +491,12 @@ fn test_crash_tracking_bin_panic_hook_after_fork() { #[test] #[cfg_attr(miri, ignore)] -#[cfg(not(target_os = "macos"))] // Same restriction as other panic tests fn test_crash_tracking_bin_panic_hook_string() { test_panic_hook_mode("panic_hook_string", "message", Some("Panic with value: 42")); } #[test] #[cfg_attr(miri, ignore)] -#[cfg(not(target_os = "macos"))] // Same restriction as other panic tests fn test_crash_tracking_bin_panic_hook_unknown_type() { test_panic_hook_mode( "panic_hook_unknown_type", @@ -513,7 +507,6 @@ fn test_crash_tracking_bin_panic_hook_unknown_type() { /// Helper function to run panic hook tests with different payload types. /// Note: Since tests are built with Debug profile, location is always expected. -#[cfg(not(target_os = "macos"))] fn test_panic_hook_mode(mode: &str, expected_category: &str, expected_panic_message: Option<&str>) { use bin_tests::test_runner::run_custom_crash_test; @@ -580,20 +573,20 @@ fn test_panic_hook_mode(mode: &str, expected_category: &str, expected_panic_mess // ==================================================================================== // These tests use `run_custom_crash_test` with the crashing_test_app artifact. -// This test is disabled for now on x86_64 musl and macos +// This test is disabled for now on x86_64 musl // It seems that on aarch64 musl, libc has CFI which allows // unwinding passed the signal frame. // Don't forget to update the ignore condition for this and also // `test_crash_tracking_callstack` when this is revisited. #[test] -#[cfg(not(any(all(target_arch = "x86_64", target_env = "musl"), target_os = "macos")))] +#[cfg(not(any(all(target_arch = "x86_64", target_env = "musl"))))] #[cfg_attr(miri, ignore)] fn test_crasht_tracking_validate_callstack() { test_crash_tracking_callstack() } -// This test is disabled for now on x86_64 musl and macos for the reason mentioned above. -#[cfg(not(any(all(target_arch = "x86_64", target_env = "musl"), target_os = "macos")))] +// This test is disabled for now on x86_64 musl for the reason mentioned above. +#[cfg(not(any(all(target_arch = "x86_64", target_env = "musl"))))] fn test_crash_tracking_callstack() { use bin_tests::test_runner::run_custom_crash_test; diff --git a/libdd-crashtracker/src/collector/emitters.rs b/libdd-crashtracker/src/collector/emitters.rs index 9b4b09b85e..379edfeb74 100644 --- a/libdd-crashtracker/src/collector/emitters.rs +++ b/libdd-crashtracker/src/collector/emitters.rs @@ -55,11 +55,38 @@ unsafe fn emit_backtrace_by_frames( w: &mut impl Write, resolve_frames: StacktraceCollection, fault_ip: usize, + ucontext: *const ucontext_t, ) -> Result<(), EmitterError> { // https://docs.rs/backtrace/latest/backtrace/index.html writeln!(w, "{DD_CRASHTRACK_BEGIN_STACKTRACE}")?; - // Absolute addresses appear to be safer to collect during a crash than debug info. + // On macOS, backtrace::trace_unsynchronized fails in forked children because + // macOS restricts many APIs after fork-without-exec. Walk the frame pointer + // chain directly from the saved ucontext registers instead. The parent's + // stack memory is still readable in the forked child + #[cfg(target_os = "macos")] + { + let _ = (resolve_frames, fault_ip); + emit_backtrace_from_ucontext(w, ucontext)?; + } + + #[cfg(not(target_os = "macos"))] + { + let _ = ucontext; + emit_backtrace_via_library(w, resolve_frames, fault_ip)?; + } + + writeln!(w, "{DD_CRASHTRACK_END_STACKTRACE}")?; + w.flush()?; + Ok(()) +} + +#[allow(dead_code)] // used from tests on macOS, from emit_backtrace_by_frames on other platforms +unsafe fn emit_backtrace_via_library( + w: &mut impl Write, + resolve_frames: StacktraceCollection, + fault_ip: usize, +) -> Result<(), EmitterError> { fn emit_absolute_addresses(w: &mut impl Write, frame: &Frame) -> Result<(), EmitterError> { write!(w, "\"ip\": \"{:?}\"", frame.ip())?; if let Some(module_base_address) = frame.module_base_address() { @@ -132,7 +159,103 @@ unsafe fn emit_backtrace_by_frames( // emit anything at all, if the crashing frame is not found for some reason ip_found = true; } - writeln!(w, "{DD_CRASHTRACK_END_STACKTRACE}")?; + Ok(()) +} + +/// Walk the frame pointer chain from the ucontext's saved registers. +/// +/// After fork(), the child process has a copy-on-write view of the parent's +/// stack memory, so the frame pointer chain from the crashed thread is still +/// readable. This avoids depending on `backtrace::trace_unsynchronized` which +/// uses macOS APIs that don't work in forked-but-not-exec'd children. +/// +/// For each IP we call `dladdr` to resolve the symbol name, symbol address, +/// and containing shared-object path. `dladdr` is safe here because it only +/// reads dyld's internal data structures (no allocation, no Mach IPC). +#[cfg(target_os = "macos")] +unsafe fn emit_backtrace_from_ucontext( + w: &mut impl Write, + ucontext: *const ucontext_t, +) -> Result<(), EmitterError> { + if ucontext.is_null() { + return Ok(()); + } + let mcontext = (*ucontext).uc_mcontext; + if mcontext.is_null() { + return Ok(()); + } + + // Get the thread's stack bounds so we only deref frame pointers + // that lie within known stack memory. Both pthread_get_stackaddr_np and + // pthread_get_stacksize_np are async-signal-safe on macOS + let thread = libc::pthread_self(); + let stack_top = libc::pthread_get_stackaddr_np(thread) as usize; + let stack_size = libc::pthread_get_stacksize_np(thread); + let stack_bottom = stack_top.saturating_sub(stack_size); + + // Returns true when the range [addr, addr+len) falls within the thread stack + let in_stack_bounds = |addr: usize, len: usize| -> bool { + let end = addr.saturating_add(len); + addr >= stack_bottom && end <= stack_top + }; + + let ss = &(*mcontext).__ss; + #[cfg(target_arch = "aarch64")] + let (pc, mut fp) = (ss.__pc as usize, ss.__fp as usize); + #[cfg(target_arch = "x86_64")] + let (pc, mut fp) = (ss.__rip as usize, ss.__rbp as usize); + + emit_frame_with_dladdr(w, pc)?; + + const MAX_FRAMES: usize = 512; + for _ in 0..MAX_FRAMES { + if fp == 0 || fp % std::mem::align_of::() != 0 { + break; + } + // Each frame record is two pointer-sized words: [saved_fp, return_addr] + // Bail out if the record falls outside the thread stack + if !in_stack_bounds(fp, 2 * std::mem::size_of::()) { + break; + } + let next_fp = *(fp as *const usize); + let return_addr = *((fp + std::mem::size_of::()) as *const usize); + if return_addr == 0 { + break; + } + emit_frame_with_dladdr(w, return_addr)?; + if next_fp <= fp { + break; + } + fp = next_fp; + } + + Ok(()) +} + +/// Emit a single stack frame, enriched with `dladdr` symbol information. +#[cfg(target_os = "macos")] +unsafe fn emit_frame_with_dladdr(w: &mut impl Write, ip: usize) -> Result<(), EmitterError> { + let mut info: libc::Dl_info = std::mem::zeroed(); + let resolved = libc::dladdr(ip as *const libc::c_void, &mut info) != 0; + + write!(w, "{{\"ip\": \"0x{ip:x}\"")?; + + if resolved { + if !info.dli_fbase.is_null() { + write!(w, ", \"module_base_address\": \"{:?}\"", info.dli_fbase)?; + } + if !info.dli_saddr.is_null() { + write!(w, ", \"symbol_address\": \"{:?}\"", info.dli_saddr)?; + } + if !info.dli_sname.is_null() { + let name = std::ffi::CStr::from_ptr(info.dli_sname); + if let Ok(s) = name.to_str() { + write!(w, ", \"function\": \"{s}\"")?; + } + } + } + + writeln!(w, "}}")?; w.flush()?; Ok(()) } @@ -213,7 +336,9 @@ pub(crate) fn emit_crashreport( // https://doc.rust-lang.org/src/std/backtrace.rs.html#332 // We do this last, so even if it crashes, we still get the other info. let fault_ip = extract_ip(ucontext); - unsafe { emit_backtrace_by_frames(pipe, config.resolve_frames(), fault_ip)? }; + unsafe { + emit_backtrace_by_frames(pipe, config.resolve_frames(), fault_ip, ucontext)? + }; } if is_runtime_callback_registered() { emit_runtime_stack(pipe)?; @@ -501,9 +626,11 @@ mod tests { }) }; let mut buf = Vec::new(); + writeln!(buf, "{DD_CRASHTRACK_BEGIN_STACKTRACE}").unwrap(); unsafe { - emit_backtrace_by_frames(&mut buf, collection, ip_of_test_fn).expect("to work ;-)"); + emit_backtrace_via_library(&mut buf, collection, ip_of_test_fn).expect("to work ;-)"); } + writeln!(buf, "{DD_CRASHTRACK_END_STACKTRACE}").unwrap(); buf } diff --git a/libdd-crashtracker/src/receiver/entry_points.rs b/libdd-crashtracker/src/receiver/entry_points.rs index 5ed5f27e44..532c580cb4 100644 --- a/libdd-crashtracker/src/receiver/entry_points.rs +++ b/libdd-crashtracker/src/receiver/entry_points.rs @@ -2,7 +2,10 @@ // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ use super::receive_report::receive_report_from_stream; -use crate::{crash_info::CrashInfo, CrashtrackerConfiguration, StacktraceCollection}; +use crate::crash_info::CrashInfo; +use crate::CrashtrackerConfiguration; +#[cfg(target_os = "linux")] +use crate::StacktraceCollection; use anyhow::Context; use std::time::Duration; use tokio::{ @@ -133,6 +136,10 @@ fn resolve_frames( config: &CrashtrackerConfiguration, crash_info: &mut CrashInfo, ) -> anyhow::Result<()> { + // enrich_callstacks uses blazesym's normalize_user_addrs (reads /proc//maps) + // and assumes ELF binaries. Both are Linux-specific; macOS has no procfs and + // uses Mach-O binaries. + #[cfg(target_os = "linux")] if config.resolve_frames() == StacktraceCollection::EnabledWithSymbolsInReceiver { let pid = crash_info .proc_info @@ -141,5 +148,7 @@ fn resolve_frames( .pid; crash_info.enrich_callstacks(pid)?; } + #[cfg(not(target_os = "linux"))] + let _ = (config, crash_info); Ok(()) }