Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions bin_tests/src/artifacts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ pub fn crashtracker_bin_test(profile: BuildProfile, panic_abort: bool) -> Artifa
}

/// Creates an ArtifactsBuild for the crashing_test_app binary.
#[cfg(not(target_os = "macos"))]
pub fn crashing_app(profile: BuildProfile, panic_abort: bool) -> ArtifactsBuild {
ArtifactsBuild {
name: "crashing_test_app".to_owned(),
Expand Down Expand Up @@ -90,15 +89,11 @@ pub fn all_prebuild_artifacts() -> Vec<ArtifactsBuild> {
artifacts.push(crashtracker_receiver(profile));
artifacts.push(test_the_tests(profile));
artifacts.push(profiling_ffi(profile));

#[cfg(not(target_os = "macos"))]
artifacts.push(crashing_app(profile, false));
}

// Panic abort variants (used by panic hook tests)
artifacts.push(crashtracker_bin_test(BuildProfile::Debug, true));

#[cfg(not(target_os = "macos"))]
artifacts.push(crashing_app(BuildProfile::Debug, true));

artifacts
Expand Down
15 changes: 4 additions & 11 deletions bin_tests/tests/crashtracker_bin_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -423,19 +423,16 @@ fn test_crash_tracking_errors_intake_uds_socket() {
/// DD_CRASHTRACK_END_STACKTRACE. Error: Can't set non-existant stack complete\n")
#[test]
#[cfg_attr(miri, ignore)]
#[cfg(not(target_os = "macos"))]
fn test_crash_tracking_bin_panic() {
test_crash_tracking_app("panic");
}

#[test]
#[cfg(not(target_os = "macos"))]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_segfault() {
test_crash_tracking_app("segfault");
}

#[cfg(not(target_os = "macos"))]
fn test_crash_tracking_app(crash_type: &str) {
use bin_tests::test_runner::run_custom_crash_test;

Expand Down Expand Up @@ -484,7 +481,6 @@ fn test_crash_tracking_app(crash_type: &str) {

#[test]
#[cfg_attr(miri, ignore)]
#[cfg(not(target_os = "macos"))] // Same restriction as other panic tests
fn test_crash_tracking_bin_panic_hook_after_fork() {
test_panic_hook_mode(
"panic_hook_after_fork",
Expand All @@ -495,14 +491,12 @@ fn test_crash_tracking_bin_panic_hook_after_fork() {

#[test]
#[cfg_attr(miri, ignore)]
#[cfg(not(target_os = "macos"))] // Same restriction as other panic tests
fn test_crash_tracking_bin_panic_hook_string() {
test_panic_hook_mode("panic_hook_string", "message", Some("Panic with value: 42"));
}

#[test]
#[cfg_attr(miri, ignore)]
#[cfg(not(target_os = "macos"))] // Same restriction as other panic tests
fn test_crash_tracking_bin_panic_hook_unknown_type() {
test_panic_hook_mode(
"panic_hook_unknown_type",
Expand All @@ -513,7 +507,6 @@ fn test_crash_tracking_bin_panic_hook_unknown_type() {

/// Helper function to run panic hook tests with different payload types.
/// Note: Since tests are built with Debug profile, location is always expected.
#[cfg(not(target_os = "macos"))]
fn test_panic_hook_mode(mode: &str, expected_category: &str, expected_panic_message: Option<&str>) {
use bin_tests::test_runner::run_custom_crash_test;

Expand Down Expand Up @@ -580,20 +573,20 @@ fn test_panic_hook_mode(mode: &str, expected_category: &str, expected_panic_mess
// ====================================================================================
// These tests use `run_custom_crash_test` with the crashing_test_app artifact.

// This test is disabled for now on x86_64 musl and macos
// This test is disabled for now on x86_64 musl
// It seems that on aarch64 musl, libc has CFI which allows
// unwinding passed the signal frame.
// Don't forget to update the ignore condition for this and also
// `test_crash_tracking_callstack` when this is revisited.
#[test]
#[cfg(not(any(all(target_arch = "x86_64", target_env = "musl"), target_os = "macos")))]
#[cfg(not(any(all(target_arch = "x86_64", target_env = "musl"))))]
#[cfg_attr(miri, ignore)]
fn test_crasht_tracking_validate_callstack() {
test_crash_tracking_callstack()
}

// This test is disabled for now on x86_64 musl and macos for the reason mentioned above.
#[cfg(not(any(all(target_arch = "x86_64", target_env = "musl"), target_os = "macos")))]
// This test is disabled for now on x86_64 musl for the reason mentioned above.
#[cfg(not(any(all(target_arch = "x86_64", target_env = "musl"))))]
fn test_crash_tracking_callstack() {
use bin_tests::test_runner::run_custom_crash_test;

Expand Down
135 changes: 131 additions & 4 deletions libdd-crashtracker/src/collector/emitters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,38 @@ unsafe fn emit_backtrace_by_frames(
w: &mut impl Write,
resolve_frames: StacktraceCollection,
fault_ip: usize,
ucontext: *const ucontext_t,
) -> Result<(), EmitterError> {
// https://docs.rs/backtrace/latest/backtrace/index.html
writeln!(w, "{DD_CRASHTRACK_BEGIN_STACKTRACE}")?;

// Absolute addresses appear to be safer to collect during a crash than debug info.
// On macOS, backtrace::trace_unsynchronized fails in forked children because
// macOS restricts many APIs after fork-without-exec. Walk the frame pointer
// chain directly from the saved ucontext registers instead. The parent's
// stack memory is still readable in the forked child
#[cfg(target_os = "macos")]
{
let _ = (resolve_frames, fault_ip);
emit_backtrace_from_ucontext(w, ucontext)?;
}

#[cfg(not(target_os = "macos"))]
{
let _ = ucontext;
emit_backtrace_via_library(w, resolve_frames, fault_ip)?;
}

writeln!(w, "{DD_CRASHTRACK_END_STACKTRACE}")?;
w.flush()?;
Ok(())
}

#[allow(dead_code)] // used from tests on macOS, from emit_backtrace_by_frames on other platforms
unsafe fn emit_backtrace_via_library(
w: &mut impl Write,
resolve_frames: StacktraceCollection,
fault_ip: usize,
) -> Result<(), EmitterError> {
fn emit_absolute_addresses(w: &mut impl Write, frame: &Frame) -> Result<(), EmitterError> {
write!(w, "\"ip\": \"{:?}\"", frame.ip())?;
if let Some(module_base_address) = frame.module_base_address() {
Expand Down Expand Up @@ -132,7 +159,103 @@ unsafe fn emit_backtrace_by_frames(
// emit anything at all, if the crashing frame is not found for some reason
ip_found = true;
}
writeln!(w, "{DD_CRASHTRACK_END_STACKTRACE}")?;
Ok(())
}

/// Walk the frame pointer chain from the ucontext's saved registers.
///
/// After fork(), the child process has a copy-on-write view of the parent's
/// stack memory, so the frame pointer chain from the crashed thread is still
/// readable. This avoids depending on `backtrace::trace_unsynchronized` which
/// uses macOS APIs that don't work in forked-but-not-exec'd children.
///
/// For each IP we call `dladdr` to resolve the symbol name, symbol address,
/// and containing shared-object path. `dladdr` is safe here because it only
/// reads dyld's internal data structures (no allocation, no Mach IPC).
#[cfg(target_os = "macos")]
unsafe fn emit_backtrace_from_ucontext(
w: &mut impl Write,
ucontext: *const ucontext_t,
) -> Result<(), EmitterError> {
if ucontext.is_null() {
return Ok(());
}
let mcontext = (*ucontext).uc_mcontext;
if mcontext.is_null() {
return Ok(());
}

// Get the thread's stack bounds so we only deref frame pointers
// that lie within known stack memory. Both pthread_get_stackaddr_np and
// pthread_get_stacksize_np are async-signal-safe on macOS
let thread = libc::pthread_self();
let stack_top = libc::pthread_get_stackaddr_np(thread) as usize;
let stack_size = libc::pthread_get_stacksize_np(thread);
let stack_bottom = stack_top.saturating_sub(stack_size);

// Returns true when the range [addr, addr+len) falls within the thread stack
let in_stack_bounds = |addr: usize, len: usize| -> bool {
let end = addr.saturating_add(len);
addr >= stack_bottom && end <= stack_top
};

let ss = &(*mcontext).__ss;
#[cfg(target_arch = "aarch64")]
let (pc, mut fp) = (ss.__pc as usize, ss.__fp as usize);
#[cfg(target_arch = "x86_64")]
let (pc, mut fp) = (ss.__rip as usize, ss.__rbp as usize);

emit_frame_with_dladdr(w, pc)?;

const MAX_FRAMES: usize = 512;
for _ in 0..MAX_FRAMES {
if fp == 0 || fp % std::mem::align_of::<usize>() != 0 {
break;
}
// Each frame record is two pointer-sized words: [saved_fp, return_addr]
// Bail out if the record falls outside the thread stack
if !in_stack_bounds(fp, 2 * std::mem::size_of::<usize>()) {
break;
}
let next_fp = *(fp as *const usize);
let return_addr = *((fp + std::mem::size_of::<usize>()) as *const usize);
if return_addr == 0 {
break;
}
emit_frame_with_dladdr(w, return_addr)?;
if next_fp <= fp {
break;
}
fp = next_fp;
}

Ok(())
Comment thread
gyuheon0h marked this conversation as resolved.
}

/// Emit a single stack frame, enriched with `dladdr` symbol information.
#[cfg(target_os = "macos")]
unsafe fn emit_frame_with_dladdr(w: &mut impl Write, ip: usize) -> Result<(), EmitterError> {
let mut info: libc::Dl_info = std::mem::zeroed();
let resolved = libc::dladdr(ip as *const libc::c_void, &mut info) != 0;

write!(w, "{{\"ip\": \"0x{ip:x}\"")?;

if resolved {
if !info.dli_fbase.is_null() {
write!(w, ", \"module_base_address\": \"{:?}\"", info.dli_fbase)?;
}
if !info.dli_saddr.is_null() {
write!(w, ", \"symbol_address\": \"{:?}\"", info.dli_saddr)?;
}
if !info.dli_sname.is_null() {
let name = std::ffi::CStr::from_ptr(info.dli_sname);
if let Ok(s) = name.to_str() {
write!(w, ", \"function\": \"{s}\"")?;
}
Comment thread
gyuheon0h marked this conversation as resolved.
}
}

writeln!(w, "}}")?;
w.flush()?;
Ok(())
}
Expand Down Expand Up @@ -213,7 +336,9 @@ pub(crate) fn emit_crashreport(
// https://doc.rust-lang.org/src/std/backtrace.rs.html#332
// We do this last, so even if it crashes, we still get the other info.
let fault_ip = extract_ip(ucontext);
unsafe { emit_backtrace_by_frames(pipe, config.resolve_frames(), fault_ip)? };
unsafe {
emit_backtrace_by_frames(pipe, config.resolve_frames(), fault_ip, ucontext)?
};
}
if is_runtime_callback_registered() {
emit_runtime_stack(pipe)?;
Expand Down Expand Up @@ -501,9 +626,11 @@ mod tests {
})
};
let mut buf = Vec::new();
writeln!(buf, "{DD_CRASHTRACK_BEGIN_STACKTRACE}").unwrap();
unsafe {
emit_backtrace_by_frames(&mut buf, collection, ip_of_test_fn).expect("to work ;-)");
emit_backtrace_via_library(&mut buf, collection, ip_of_test_fn).expect("to work ;-)");
}
writeln!(buf, "{DD_CRASHTRACK_END_STACKTRACE}").unwrap();
buf
}

Expand Down
11 changes: 10 additions & 1 deletion libdd-crashtracker/src/receiver/entry_points.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/

use super::receive_report::receive_report_from_stream;
use crate::{crash_info::CrashInfo, CrashtrackerConfiguration, StacktraceCollection};
use crate::crash_info::CrashInfo;
use crate::CrashtrackerConfiguration;
#[cfg(target_os = "linux")]
use crate::StacktraceCollection;
use anyhow::Context;
use std::time::Duration;
use tokio::{
Expand Down Expand Up @@ -133,6 +136,10 @@ fn resolve_frames(
config: &CrashtrackerConfiguration,
crash_info: &mut CrashInfo,
) -> anyhow::Result<()> {
// enrich_callstacks uses blazesym's normalize_user_addrs (reads /proc/<pid>/maps)
// and assumes ELF binaries. Both are Linux-specific; macOS has no procfs and
// uses Mach-O binaries.
#[cfg(target_os = "linux")]
if config.resolve_frames() == StacktraceCollection::EnabledWithSymbolsInReceiver {
let pid = crash_info
.proc_info
Expand All @@ -141,5 +148,7 @@ fn resolve_frames(
.pid;
crash_info.enrich_callstacks(pid)?;
}
#[cfg(not(target_os = "linux"))]
let _ = (config, crash_info);
Ok(())
}
Loading