diff --git a/bin_tests/src/bin/crashtracker_bin_test.rs b/bin_tests/src/bin/crashtracker_bin_test.rs index 6ab95b6512..a9c3f379db 100644 --- a/bin_tests/src/bin/crashtracker_bin_test.rs +++ b/bin_tests/src/bin/crashtracker_bin_test.rs @@ -25,6 +25,7 @@ mod unix { use libdd_common::{tag, Endpoint}; use libdd_crashtracker::{ self as crashtracker, CrashtrackerConfiguration, CrashtrackerReceiverConfig, Metadata, + StackFrame, StackTrace, }; const TEST_COLLECTOR_TIMEOUT: Duration = Duration::from_secs(15); @@ -154,6 +155,31 @@ mod unix { "raise_sigill" => raise(Signal::SIGILL)?, "raise_sigbus" => raise(Signal::SIGBUS)?, "raise_sigsegv" => raise(Signal::SIGSEGV)?, + "unhandled_exception" => { + let mut stacktrace = StackTrace::new_incomplete(); + let mut stackframe1 = StackFrame::new(); + stackframe1.with_ip(1234); + stackframe1.with_function("test_function1".to_string()); + stackframe1.with_file("test_file1".to_string()); + + let mut stackframe2 = StackFrame::new(); + stackframe2.with_ip(5678); + stackframe2.with_function("test_function2".to_string()); + stackframe2.with_file("test_file2".to_string()); + + stacktrace.push_frame(stackframe1, true).unwrap(); + stacktrace.push_frame(stackframe2, true).unwrap(); + + stacktrace.set_complete().unwrap(); + + crashtracker::report_unhandled_exception( + Some("RuntimeException"), + Some("an exception occured"), + stacktrace, + )?; + + process::exit(0); + } _ => anyhow::bail!("Unexpected crash_typ: {crash_typ}"), } crashtracker::end_op(crashtracker::OpTypes::ProfilerCollectingSample)?; diff --git a/bin_tests/src/test_types.rs b/bin_tests/src/test_types.rs index 1666264f77..ec3faf235c 100644 --- a/bin_tests/src/test_types.rs +++ b/bin_tests/src/test_types.rs @@ -114,6 +114,8 @@ pub enum CrashType { RaiseSigBus, /// Raise SIGSEGV RaiseSigSegv, + /// Unhandled Exception + UnhandledException, } impl CrashType { @@ -129,6 +131,7 @@ impl CrashType { Self::RaiseSigIll => "raise_sigill", Self::RaiseSigBus => "raise_sigbus", Self::RaiseSigSegv => "raise_sigsegv", + Self::UnhandledException => "unhandled_exception", } } @@ -138,7 +141,11 @@ impl CrashType { pub const fn expects_success(self) -> bool { matches!( self, - Self::KillSigBus | Self::KillSigSegv | Self::RaiseSigBus | Self::RaiseSigSegv + Self::KillSigBus + | Self::KillSigSegv + | Self::RaiseSigBus + | Self::RaiseSigSegv + | Self::UnhandledException ) } @@ -150,6 +157,7 @@ impl CrashType { Self::KillSigAbrt | Self::RaiseSigAbrt => 6, // SIGABRT Self::KillSigIll | Self::RaiseSigIll => 4, // SIGILL Self::KillSigBus | Self::RaiseSigBus => 7, // SIGBUS + Self::UnhandledException => 0, // no signal } } @@ -160,6 +168,7 @@ impl CrashType { Self::KillSigAbrt | Self::RaiseSigAbrt => "SIGABRT", Self::KillSigIll | Self::RaiseSigIll => "SIGILL", Self::KillSigBus | Self::RaiseSigBus => "SIGBUS", + Self::UnhandledException => "Unhandled Exception", } } } @@ -184,6 +193,7 @@ impl std::str::FromStr for CrashType { "raise_sigill" => Ok(Self::RaiseSigIll), "raise_sigbus" => Ok(Self::RaiseSigBus), "raise_sigsegv" => Ok(Self::RaiseSigSegv), + "unhandled_exception" => Ok(Self::UnhandledException), _ => Err(format!("Unknown crash type: {}", s)), } } @@ -220,5 +230,6 @@ mod tests { assert!(!CrashType::KillSigAbrt.expects_success()); assert!(CrashType::KillSigBus.expects_success()); assert!(CrashType::KillSigSegv.expects_success()); + assert!(CrashType::UnhandledException.expects_success()); } } diff --git a/bin_tests/tests/crashtracker_bin_test.rs b/bin_tests/tests/crashtracker_bin_test.rs index 5e0683a03b..20c36e2c2c 100644 --- a/bin_tests/tests/crashtracker_bin_test.rs +++ b/bin_tests/tests/crashtracker_bin_test.rs @@ -96,6 +96,42 @@ fn run_standard_crash_test_refactored( // These tests below use the new infrastructure but require custom validation logic // that doesn't fit the simple macro-generated pattern. +#[test] +#[cfg_attr(miri, ignore)] +fn test_crash_tracking_bin_unhandled_exception() { + let config = CrashTestConfig::new( + BuildProfile::Release, + TestMode::DoNothing, + CrashType::UnhandledException, + ); + let artifacts = StandardArtifacts::new(config.profile); + let artifacts_map = build_artifacts(&artifacts.as_slice()).unwrap(); + + let validator: ValidatorFn = Box::new(|payload, fixtures| { + PayloadValidator::new(payload) + .validate_counters()? + .validate_error_kind("UnhandledException")? + .validate_error_message_contains("Process was terminated due to an unhandled exception of type 'RuntimeException'. Message: an exception occured")? + // The two frames emitted in the bin: test_function1 and test_function2 + .validate_callstack_functions(&["test_function1", "test_function2"])?; + + // Unhandled exceptions have no signal info + let sig_info = &payload["sig_info"]; + assert!( + sig_info.is_null() + || sig_info.is_object() && sig_info.as_object().is_none_or(|m| m.is_empty()), + "Expected no sig_info for unhandled exception, got: {sig_info:?}" + ); + + // Validate rest of telemetry + validate_telemetry(&fixtures.crash_telemetry_path, "unhandled_exception")?; + + Ok(()) + }); + + run_crash_test_with_artifacts(&config, &artifacts_map, &artifacts, validator).unwrap(); +} + #[test] #[cfg_attr(miri, ignore)] fn test_crash_tracking_bin_runtime_callback_frame() { @@ -1027,6 +1063,12 @@ fn assert_siginfo_message(sig_info: &Value, crash_typ: &str) { assert_eq!(sig_info["si_signo"], libc::SIGILL); assert_eq!(sig_info["si_signo_human_readable"], "SIGILL"); } + "unhandled_exception" => { + assert!( + sig_info.is_null() + || sig_info.is_object() && sig_info.as_object().is_none_or(|m| m.is_empty()) + ); + } _ => panic!("unexpected crash_typ {crash_typ}"), } } @@ -1101,9 +1143,10 @@ fn assert_telemetry_message(crash_telemetry: &[u8], crash_typ: &str) { "profiler_unwinding:0".to_string(), ]); + assert!(base_expected_tags.is_subset(&tags), "{tags:?}"); + match crash_typ { "null_deref" => { - assert!(base_expected_tags.is_subset(&tags), "{tags:?}"); assert!(tags.contains("si_addr:0x0000000000000000"), "{tags:?}"); assert!( tags.contains("si_code_human_readable:SEGV_ACCERR") @@ -1118,17 +1161,14 @@ fn assert_telemetry_message(crash_telemetry: &[u8], crash_typ: &str) { ); } "kill_sigabrt" => { - assert!(base_expected_tags.is_subset(&tags), "{tags:?}"); assert!(tags.contains("si_signo_human_readable:SIGABRT"), "{tags:?}"); assert!(tags.contains("si_signo:6"), "{tags:?}"); } "kill_sigill" => { - assert!(base_expected_tags.is_subset(&tags), "{tags:?}"); assert!(tags.contains("si_signo_human_readable:SIGILL"), "{tags:?}"); assert!(tags.contains("si_signo:4"), "{tags:?}"); } "kill_sigbus" => { - assert!(base_expected_tags.is_subset(&tags), "{tags:?}"); assert!(tags.contains("si_signo_human_readable:SIGBUS"), "{tags:?}"); // SIGBUS can be 7 or 10, depending on the os. assert!( @@ -1137,22 +1177,18 @@ fn assert_telemetry_message(crash_telemetry: &[u8], crash_typ: &str) { ); } "kill_sigsegv" => { - assert!(base_expected_tags.is_subset(&tags), "{tags:?}"); assert!(tags.contains("si_signo_human_readable:SIGSEGV"), "{tags:?}"); assert!(tags.contains("si_signo:11"), "{tags:?}"); } "raise_sigabrt" => { - assert!(base_expected_tags.is_subset(&tags), "{tags:?}"); assert!(tags.contains("si_signo_human_readable:SIGABRT"), "{tags:?}"); assert!(tags.contains("si_signo:6"), "{tags:?}"); } "raise_sigill" => { - assert!(base_expected_tags.is_subset(&tags), "{tags:?}"); assert!(tags.contains("si_signo_human_readable:SIGILL"), "{tags:?}"); assert!(tags.contains("si_signo:4"), "{tags:?}"); } "raise_sigbus" => { - assert!(base_expected_tags.is_subset(&tags), "{tags:?}"); assert!(tags.contains("si_signo_human_readable:SIGBUS"), "{tags:?}"); // SIGBUS can be 7 or 10, depending on the os. assert!( @@ -1161,10 +1197,12 @@ fn assert_telemetry_message(crash_telemetry: &[u8], crash_typ: &str) { ); } "raise_sigsegv" => { - assert!(base_expected_tags.is_subset(&tags), "{tags:?}"); assert!(tags.contains("si_signo_human_readable:SIGSEGV"), "{tags:?}"); assert!(tags.contains("si_signo:11"), "{tags:?}"); } + "unhandled_exception" => { + // Unhandled exceptions have no signal info tags + } _ => panic!("{crash_typ}"), } diff --git a/examples/ffi/CMakeLists.txt b/examples/ffi/CMakeLists.txt index c07e7f187a..b1f9e9820f 100644 --- a/examples/ffi/CMakeLists.txt +++ b/examples/ffi/CMakeLists.txt @@ -65,6 +65,9 @@ set_vcruntime_link_type(telemetry_metrics ${VCRUNTIME_LINK_TYPE}) if(NOT WIN32) add_executable(crashtracking crashtracking.c) target_link_libraries(crashtracking PRIVATE Datadog::Profiling) + + add_executable(crashtracking_unhandled_exception crashtracking_unhandled_exception.c) + target_link_libraries(crashtracking_unhandled_exception PRIVATE Datadog::Profiling) endif() add_executable(trace_exporter trace_exporter.c) diff --git a/examples/ffi/crashtracking_unhandled_exception.c b/examples/ffi/crashtracking_unhandled_exception.c new file mode 100644 index 0000000000..fab38d5e10 --- /dev/null +++ b/examples/ffi/crashtracking_unhandled_exception.c @@ -0,0 +1,185 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 +// +// FFI test for ddog_crasht_report_unhandled_exception. +// +// This test initializes the crashtracker (without a live signal handler), +// builds a small runtime StackTrace, calls report_unhandled_exception, and +// verifies that a crash report file is produced in the current directory. +// +// Usage: +// crashtracking_unhandled_exception [receiver_binary_path] +// +// The receiver binary path may also be supplied via the +// DDOG_CRASHT_TEST_RECEIVER environment variable. When run through +// `cargo ffi-test` the variable is set automatically. + +#include +#include +#include +#include +#include + +static ddog_CharSlice slice(const char *s) { + return (ddog_CharSlice){.ptr = s, .len = strlen(s)}; +} + +static void handle_void(ddog_VoidResult result, const char *ctx) { + if (result.tag != DDOG_VOID_RESULT_OK) { + ddog_CharSlice msg = ddog_Error_message(&result.err); + fprintf(stderr, "FAIL [%s]: %.*s\n", ctx, (int)msg.len, msg.ptr); + ddog_Error_drop(&result.err); + exit(EXIT_FAILURE); + } +} + +static void push_named_frame(ddog_crasht_Handle_StackTrace *trace, + const char *function_name, uintptr_t ip) { + ddog_crasht_StackFrame_NewResult fr = ddog_crasht_StackFrame_new(); + if (fr.tag != DDOG_CRASHT_STACK_FRAME_NEW_RESULT_OK) { + ddog_CharSlice msg = ddog_Error_message(&fr.err); + fprintf(stderr, "FAIL [StackFrame_new]: %.*s\n", (int)msg.len, msg.ptr); + ddog_Error_drop(&fr.err); + exit(EXIT_FAILURE); + } + + ddog_crasht_Handle_StackFrame *frame = + (ddog_crasht_Handle_StackFrame *)malloc(sizeof(*frame)); + if (!frame) { + fputs("FAIL [malloc frame]\n", stderr); + exit(EXIT_FAILURE); + } + *frame = fr.ok; + + handle_void(ddog_crasht_StackFrame_with_function(frame, slice(function_name)), + "StackFrame_with_function"); + if (ip != 0) { + handle_void(ddog_crasht_StackFrame_with_ip(frame, ip), + "StackFrame_with_ip"); + } + + /* push_frame consumes the frame */ + handle_void(ddog_crasht_StackTrace_push_frame(trace, frame, /*incomplete=*/true), + "StackTrace_push_frame"); + free(frame); +} + +// Entry point +int main(int argc, char **argv) { + const char *receiver_path = NULL; + if (argc >= 2) { + receiver_path = argv[1]; + } else { + receiver_path = getenv("DDOG_CRASHT_TEST_RECEIVER"); + } + if (!receiver_path || receiver_path[0] == '\0') { + fputs("FAIL: receiver binary path not provided.\n" + " Pass it as argv[1] or set DDOG_CRASHT_TEST_RECEIVER.\n", + stderr); + return EXIT_FAILURE; + } + + static const char output_file[] = "crashreport_unhandled_exception.json"; + static const char stderr_file[] = "crashreport_unhandled_exception.stderr"; + static const char stdout_file[] = "crashreport_unhandled_exception.stdout"; + + // Forward the dynamic-linker search path to the receiver process. + // The receiver is execve'd with an explicit environment so it does not + // inherit the parent's env automatically. The variable name differs by OS: + // Linux / ELF → LD_LIBRARY_PATH + // macOS → DYLD_LIBRARY_PATH +#ifdef __APPLE__ + const char *ld_search_path_var = "DYLD_LIBRARY_PATH"; +#else + const char *ld_search_path_var = "LD_LIBRARY_PATH"; +#endif + const char *ld_library_path = getenv(ld_search_path_var); + ddog_crasht_EnvVar env_vars[1]; + ddog_crasht_Slice_EnvVar env_slice = {.ptr = NULL, .len = 0}; + if (ld_library_path && ld_library_path[0] != '\0') { + env_vars[0].key = slice(ld_search_path_var); + env_vars[0].val = slice(ld_library_path); + env_slice.ptr = env_vars; + env_slice.len = 1; + } + + ddog_crasht_ReceiverConfig receiver_config = { + .path_to_receiver_binary = slice(receiver_path), + .optional_stderr_filename = slice(stderr_file), + .optional_stdout_filename = slice(stdout_file), + .env = env_slice, + }; + + struct ddog_Endpoint *endpoint = + ddog_endpoint_from_filename(slice(output_file)); + + struct ddog_crasht_Slice_CInt signals = ddog_crasht_default_signals(); + ddog_crasht_Config config = { + .create_alt_stack = false, + .endpoint = endpoint, + .resolve_frames = DDOG_CRASHT_STACKTRACE_COLLECTION_DISABLED, + .signals = {.ptr = signals.ptr, .len = signals.len}, + }; + + ddog_crasht_Metadata metadata = { + .library_name = slice("crashtracking-ffi-test"), + .library_version = slice("0.0.0"), + .family = slice("native"), + .tags = NULL, + }; + + handle_void(ddog_crasht_init(config, receiver_config, metadata), + "ddog_crasht_init"); + ddog_endpoint_drop(endpoint); + + // Build a runtime StackTrace with two synthetic frames. + ddog_crasht_StackTrace_NewResult tr = ddog_crasht_StackTrace_new(); + if (tr.tag != DDOG_CRASHT_STACK_TRACE_NEW_RESULT_OK) { + ddog_CharSlice msg = ddog_Error_message(&tr.err); + fprintf(stderr, "FAIL [StackTrace_new]: %.*s\n", (int)msg.len, msg.ptr); + ddog_Error_drop(&tr.err); + return EXIT_FAILURE; + } + + ddog_crasht_Handle_StackTrace *trace = + (ddog_crasht_Handle_StackTrace *)malloc(sizeof(*trace)); + if (!trace) { + fputs("FAIL [malloc trace]\n", stderr); + return EXIT_FAILURE; + } + *trace = tr.ok; + + push_named_frame(trace, "com.example.MyApp.processRequest", 0x1000); + push_named_frame(trace, "com.example.runtime.EventLoop.run", 0x2000); + push_named_frame(trace, "com.example.runtime.main", 0x3000); + + handle_void(ddog_crasht_StackTrace_set_complete(trace), + "StackTrace_set_complete"); + + // Report the unhandled exception. This call: + // - spawns the receiver process, + // - sends the crash report over the socket, + // - waits for the receiver to finish writing the report, + // - returns Ok on success. + handle_void( + ddog_crasht_report_unhandled_exception( + slice("com.example.UncaughtRuntimeException"), + slice("Something went very wrong in the runtime"), + trace), + "ddog_crasht_report_unhandled_exception"); + + free(trace); + + // Verify a report file was produced. + FILE *f = fopen(output_file, "r"); + if (!f) { + fprintf(stderr, "FAIL: expected crash report at '%s' but file not found\n", + output_file); + return EXIT_FAILURE; + } + fclose(f); + + printf("PASS: crash report written to '%s'\n", output_file); + return EXIT_SUCCESS; +} + diff --git a/libdd-crashtracker-ffi/src/collector/mod.rs b/libdd-crashtracker-ffi/src/collector/mod.rs index 9cde708105..e83cddd4fb 100644 --- a/libdd-crashtracker-ffi/src/collector/mod.rs +++ b/libdd-crashtracker-ffi/src/collector/mod.rs @@ -10,8 +10,9 @@ pub use additional_tags::*; pub use counters::*; pub use datatypes::*; use function_name::named; -use libdd_common_ffi::{wrap_with_void_ffi_result, Slice, VoidResult}; -use libdd_crashtracker::{CrashtrackerReceiverConfig, DEFAULT_SYMBOLS}; +use libdd_common_ffi::slice::AsBytes; +use libdd_common_ffi::{wrap_with_void_ffi_result, CharSlice, Handle, Slice, ToInner, VoidResult}; +use libdd_crashtracker::{CrashtrackerReceiverConfig, StackTrace, DEFAULT_SYMBOLS}; pub use spans::*; #[no_mangle] @@ -177,3 +178,56 @@ pub unsafe extern "C" fn ddog_crasht_init_without_receiver( pub extern "C" fn ddog_crasht_default_signals() -> Slice<'static, libc::c_int> { Slice::new(&DEFAULT_SYMBOLS) } + +#[no_mangle] +#[must_use] +#[named] +/// Report an unhandled exception as a crash event. +/// +/// This function sends a crash report for an unhandled exception detected +/// by the runtime. It is intended to be called when the process is in a +/// terminal state due to an unhandled exception. +/// +/// # Parameters +/// - `error_type`: Optional type/class of the exception (e.g. "NullPointerException"). Pass empty +/// CharSlice for unknown. +/// - `error_message`: Optional error message. Pass empty CharSlice for no message. +/// - `runtime_stack`: Stack trace from the runtime. Consumed by this call. +/// +/// If the crash-tracker has not been initialized, this function is a no-op. +/// +/// # Side effects +/// This function disables the signal-based crash handler before performing +/// any work. This means that if the process receives a fatal signal (SIGSEGV) +/// during or after this call, the crashtracker will not produce a +/// second crash report. The previous signal handler (if any) will still be +/// chained. +/// +/// # Failure mode +/// If a fatal signal occurs while this function is in progress, the calling +/// process is in an unrecoverable state; the crashtracker cannot report the +/// secondary fault and the caller's own signal handler (if any) will execute +/// in a potentially corrupted context. Callers should treat this function as a +/// terminal operation and exit shortly after it returns. +/// +/// # Safety +/// Crash-tracking functions are not reentrant. +/// No other crash-handler functions should be called concurrently. +/// The `runtime_stack` handle must be valid and will be consumed. +pub unsafe extern "C" fn ddog_crasht_report_unhandled_exception( + error_type: CharSlice, + error_message: CharSlice, + mut runtime_stack: *mut Handle, +) -> VoidResult { + wrap_with_void_ffi_result!({ + let error_type_opt = error_type.try_to_string_option()?; + let error_message_opt = error_message.try_to_string_option()?; + let stack = *runtime_stack.take()?; + + libdd_crashtracker::report_unhandled_exception( + error_type_opt.as_deref(), + error_message_opt.as_deref(), + stack, + )?; + }) +} diff --git a/libdd-crashtracker/src/collector/collector_manager.rs b/libdd-crashtracker/src/collector/collector_manager.rs index 9abbe9d652..3a3f134ea7 100644 --- a/libdd-crashtracker/src/collector/collector_manager.rs +++ b/libdd-crashtracker/src/collector/collector_manager.rs @@ -5,7 +5,7 @@ use super::process_handle::ProcessHandle; use super::receiver_manager::Receiver; use libdd_common::timeout::TimeoutManager; -use super::emitters::emit_crashreport; +use super::emitters::{emit_crashreport, CrashKindData}; use crate::shared::configuration::CrashtrackerConfiguration; use libc::{siginfo_t, ucontext_t}; use libdd_common::unix_utils::{alt_fork, terminate}; @@ -118,8 +118,7 @@ pub(crate) fn run_collector_child( config_str, metadata_str, message_ptr, - sig_info, - ucontext, + CrashKindData::UnixSignal { sig_info, ucontext }, ppid, crashing_tid, ); diff --git a/libdd-crashtracker/src/collector/crash_handler.rs b/libdd-crashtracker/src/collector/crash_handler.rs index 917329295a..cc53eefc7b 100644 --- a/libdd-crashtracker/src/collector/crash_handler.rs +++ b/libdd-crashtracker/src/collector/crash_handler.rs @@ -8,8 +8,12 @@ use super::receiver_manager::Receiver; use super::signal_handler_manager::chain_signal_handler; use crate::crash_info::Metadata; use crate::shared::configuration::CrashtrackerConfiguration; +use crate::StackTrace; use libc::{c_void, siginfo_t, ucontext_t}; use libdd_common::timeout::TimeoutManager; +use std::os::fd::OwnedFd; +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::os::unix::net::UnixStream; use std::panic; use std::panic::PanicHookInfo; use std::ptr; @@ -251,16 +255,17 @@ fn handle_posix_signal_impl( return Ok(()); } - // Leak config and metadata to avoid calling `drop` during a crash - // Note that these operations also replace the global states. When the one-time guard is - // passed, all global configuration and metadata becomes invalid. - let config_ptr = CONFIG.swap(ptr::null_mut(), SeqCst); + // Take config and metadata out of global storage. + // We borrow via raw pointer and intentionally leak (do not reconstruct the Box) to avoid + // calling `drop`, and therefore `free`, inside a signal handler, which is not + // async-signal-safe. Once the one-time guard is passed, this storage is never updated again. + let config_ptr = take_config_ptr(); if config_ptr.is_null() { return Err(CrashHandlerError::NoConfig); } let (config, config_str) = unsafe { &*config_ptr }; - let metadata_ptr = METADATA.swap(ptr::null_mut(), SeqCst); + let metadata_ptr = take_metadata_ptr(); if metadata_ptr.is_null() { return Err(CrashHandlerError::NoMetadata); } @@ -273,16 +278,7 @@ fn handle_posix_signal_impl( let timeout_manager = TimeoutManager::new(config.timeout()); - // Optionally, create the receiver. This all hinges on whether or not the configuration has a - // non-null unix domain socket specified. If it doesn't, then we need to check the receiver - // configuration. If it does, then we just connect to the socket. - let unix_socket_path = config.unix_socket_path().as_deref().unwrap_or_default(); - - let receiver = if unix_socket_path.is_empty() { - Receiver::spawn_from_stored_config()? - } else { - Receiver::from_socket(unix_socket_path)? - }; + let receiver = Receiver::from_crashtracker_config(config)?; let collector = Collector::spawn( &receiver, @@ -301,9 +297,192 @@ fn handle_posix_signal_impl( Ok(()) } +/// Atomically swaps the metadata pointer to null and returns the old raw pointer. +/// Async-signal-safe (only performs an atomic swap). +/// +/// Callers are responsible for the returned memory: +/// - Signal handlers: borrow via `&*ptr` and intentionally leak (avoids signal-unsafe `free`). +fn take_metadata_ptr() -> *mut (crate::crash_info::Metadata, String) { + METADATA.swap(ptr::null_mut(), SeqCst) +} + +/// Atomically swaps the config pointer to null and returns the old raw pointer. +/// Async-signal-safe (only performs an atomic swap). +/// +/// Callers are responsible for the returned memory: +/// - Signal handlers: borrow via `&*ptr` and intentionally leak (avoids signal-unsafe `free`). +fn take_config_ptr() -> *mut ( + crate::shared::configuration::CrashtrackerConfiguration, + String, +) { + CONFIG.swap(ptr::null_mut(), SeqCst) +} + +/// Takes the current metadata out of global storage, leaving it unset. +/// The returned value is properly owned and will be dropped by the caller. +/// Do NOT call from a signal handler; use `take_metadata_ptr` instead. +fn take_metadata() -> Option<(crate::crash_info::Metadata, String)> { + let ptr = take_metadata_ptr(); + if ptr.is_null() { + None + } else { + // Safety: ptr was created by Box::into_raw in update_metadata + Some(*unsafe { Box::from_raw(ptr) }) + } +} + +/// Takes the current config out of global storage, leaving it unset. +/// The returned value is properly owned and will be dropped by the caller. +/// Do NOT call from a signal handler; use `take_config_ptr` instead. +fn take_config() -> Option<( + crate::shared::configuration::CrashtrackerConfiguration, + String, +)> { + let ptr = take_config_ptr(); + if ptr.is_null() { + None + } else { + // Safety: ptr was created by Box::into_raw in update_config + Some(*unsafe { Box::from_raw(ptr) }) + } +} + +/// This function is designed to be when a program is at a terminal state +/// and the application wants to report an unhandled exception to the crashtracker +/// If this crashes, then the application will also crash. Ensure that this API is +/// called when the application is at a terminal state and exit quickly after. +/// +/// This API handles reporting both the crash ping and the crash report for the +/// unhandled exception. +/// +/// Preconditions: +/// - The crashtracker must be started +/// - The stacktrace must be valid +/// +/// This function will spawn the receiver process and call an emit function to pipe over +/// the crash data. We don't use the collector process because we are not in a signal handler +/// Rather, we call emit_crashreport directly and pipe over data to the receiver +pub fn report_unhandled_exception( + exception_type: Option<&str>, + exception_message: Option<&str>, + stacktrace: StackTrace, +) -> Result<(), CrashHandlerError> { + // Although both report_unhandled_exception and handle_posix_signal_impl do similar things of + // 1. Getting config and metadata + // 2. Spawn receiver + // 3. Set timeout + // 4. Emit report + // 5. Finish logic + // It is not worth going out of the way to combine these because: + // 1. The signal handler borrows and leaks (async-signal-safe); unifying them would require a + // generic or trait just to paper over a deliberate constraint, making the split harder to + // see. + // 2. The emit + finish: completely different mechanisms (fork vs. direct IO, Collector vs. + // raw ProcessHandle). + // 3. TimeoutManager::new(config.timeout()); one line, not worth extracting. + + // Turn crashtracker off to prevent a recursive crash report emission + // We do not turn it back on because this function is not intended to be used as + // a recurring mechanism to report exceptions. We expect the application to exit + // after + disable(); + + let (config, config_str) = take_config().ok_or(CrashHandlerError::NoConfig)?; + let (_metadata, metadata_str) = take_metadata().ok_or(CrashHandlerError::NoMetadata)?; + + let receiver = Receiver::from_crashtracker_config(&config)?; + + let timeout_manager = TimeoutManager::new(config.timeout()); + + let pid = unsafe { libc::getpid() }; + let tid = libdd_common::threading::get_current_thread_id() as libc::pid_t; + + let error_type_str = exception_type.unwrap_or(""); + let error_message_str = exception_message.unwrap_or(""); + let message = format!( + "Process was terminated due to an unhandled exception of type '{error_type_str}'. \ + Message: {error_message_str}" + ); + + let message_ptr = Box::into_raw(Box::new(message)); + + // Duplicate the socket fd before handing it to UnixStream so we retain an fd to poll on after + // the write end is closed. OwnedFd is the scope guard: it closes poll_fd on any exit path. + // + // SAFETY: dup() returns a fresh fd; we are its sole owner. ProcessHandle only polls it + // (wait_for_pollhup) and has no Drop impl, so it never closes the fd. Closing it here + // after finish() returns is the first and only close + let poll_fd = unsafe { OwnedFd::from_raw_fd(libc::dup(receiver.handle.uds_fd)) }; + let receiver_pid = receiver.handle.pid; + + { + let mut unix_stream = unsafe { UnixStream::from_raw_fd(receiver.handle.uds_fd) }; + let _ = super::emitters::emit_crashreport( + &mut unix_stream, + &config, + &config_str, + &metadata_str, + message_ptr, + super::emitters::CrashKindData::UnhandledException { stacktrace }, + pid, + tid, + ); + // unix_stream is dropped here, closing the write end of the socket. + // This signals EOF to the receiver so it can finish writing the crash report. + } + + // Wait for the receiver to signal it is done (POLLHUP on the dup'd fd), then reap it. + // poll_fd is dropped at the end of this function, closing the fd. + let finish_handle = + super::process_handle::ProcessHandle::new(poll_fd.as_raw_fd(), receiver_pid); + finish_handle.finish(&timeout_manager); + + Ok(()) +} #[cfg(test)] mod tests { use super::*; + use std::time::Duration; + + fn make_test_metadata() -> Metadata { + Metadata { + library_name: "test-lib".to_string(), + library_version: "1.0.0".to_string(), + family: "test-family".to_string(), + tags: vec![], + } + } + + fn make_test_config() -> CrashtrackerConfiguration { + CrashtrackerConfiguration::new( + vec![], // additional_files + false, // create_alt_stack + false, // use_alt_stack + None, // endpoint + crate::StacktraceCollection::Disabled, + vec![], // signals + Some(Duration::from_secs(1)), // timeout + None, // unix_socket_path + false, // demangle_names + ) + .unwrap() + } + + /// Clears METADATA global, properly freeing any existing Box + fn clear_metadata() { + let ptr = METADATA.swap(ptr::null_mut(), SeqCst); + if !ptr.is_null() { + unsafe { drop(Box::from_raw(ptr)) }; + } + } + + /// Clears CONFIG global, properly freeing any existing Box + fn clear_config() { + let ptr = CONFIG.swap(ptr::null_mut(), SeqCst); + if !ptr.is_null() { + unsafe { drop(Box::from_raw(ptr)) }; + } + } #[test] fn test_register_panic_hook() { @@ -448,4 +627,116 @@ mod tests { "Process panicked with message \"test \"quoted\" 'text'\"" ); } + + // take_metadata_ptr + + #[test] + fn test_take_metadata_ptr_returns_null_when_unset() { + clear_metadata(); + assert!(take_metadata_ptr().is_null()); + } + + #[test] + fn test_take_metadata_ptr_takes_value_and_leaves_null() { + clear_metadata(); + update_metadata(make_test_metadata()).unwrap(); + + let ptr = take_metadata_ptr(); + assert!(!ptr.is_null()); + + // Storage is now null; a second take returns null. + assert!(take_metadata_ptr().is_null()); + + // Reconstruct the Box to avoid a leak. + unsafe { drop(Box::from_raw(ptr)) }; + } + + #[test] + fn test_take_metadata_ptr_preserves_data() { + clear_metadata(); + let metadata = make_test_metadata(); + update_metadata(metadata.clone()).unwrap(); + + let ptr = take_metadata_ptr(); + assert!(!ptr.is_null()); + + let (stored_metadata, stored_json) = unsafe { &*ptr }; + assert_eq!(stored_metadata.library_name, metadata.library_name); + assert_eq!(stored_metadata.library_version, metadata.library_version); + assert_eq!(stored_metadata.family, metadata.family); + // The serialised string must be valid non-empty JSON. + assert!(!stored_json.is_empty()); + assert!(serde_json::from_str::(stored_json).is_ok()); + + unsafe { drop(Box::from_raw(ptr)) }; + } + + // take_config_ptr + + #[test] + fn test_take_config_ptr_returns_null_when_unset() { + clear_config(); + assert!(take_config_ptr().is_null()); + } + + #[test] + fn test_take_config_ptr_takes_value_and_leaves_null() { + clear_config(); + update_config(make_test_config()).unwrap(); + + let ptr = take_config_ptr(); + assert!(!ptr.is_null()); + + // Storage is now null; a second take returns null. + assert!(take_config_ptr().is_null()); + + unsafe { drop(Box::from_raw(ptr)) }; + } + + // take_metadata + + #[test] + fn test_take_metadata_returns_none_when_unset() { + clear_metadata(); + assert!(take_metadata().is_none()); + } + + #[test] + fn test_take_metadata_returns_value_and_leaves_none() { + clear_metadata(); + let metadata = make_test_metadata(); + update_metadata(metadata.clone()).unwrap(); + + let (taken_metadata, taken_json) = take_metadata().expect("should return Some"); + assert_eq!(taken_metadata.library_name, metadata.library_name); + assert_eq!(taken_metadata.library_version, metadata.library_version); + assert_eq!(taken_metadata.family, metadata.family); + assert!(!taken_json.is_empty()); + + // Second take: storage is empty. + assert!(take_metadata().is_none()); + } + + // take_config + + #[test] + fn test_take_config_returns_none_when_unset() { + clear_config(); + assert!(take_config().is_none()); + } + + #[test] + fn test_take_config_returns_value_and_leaves_none() { + clear_config(); + let config = make_test_config(); + update_config(config.clone()).unwrap(); + + let (taken_config, taken_json) = take_config().expect("should return Some"); + assert_eq!(taken_config, config); + assert!(!taken_json.is_empty()); + assert!(serde_json::from_str::(&taken_json).is_ok()); + + // Second take: storage is empty. + assert!(take_config().is_none()); + } } diff --git a/libdd-crashtracker/src/collector/emitters.rs b/libdd-crashtracker/src/collector/emitters.rs index 33de2e8ce7..9b4b09b85e 100644 --- a/libdd-crashtracker/src/collector/emitters.rs +++ b/libdd-crashtracker/src/collector/emitters.rs @@ -10,7 +10,8 @@ use crate::runtime_callback::{ }; use crate::shared::constants::*; use crate::{ - translate_si_code, CrashtrackerConfiguration, ErrorKind, SignalNames, StacktraceCollection, + translate_si_code, CrashtrackerConfiguration, ErrorKind, SignalNames, StackTrace, + StacktraceCollection, }; use backtrace::Frame; use libc::{siginfo_t, ucontext_t}; @@ -34,6 +35,8 @@ pub enum EmitterError { CounterError(#[from] crate::collector::counters::CounterError), #[error("Atomic set error: {0}")] AtomicSetError(#[from] crate::collector::atomic_set::AtomicSetError), + #[error("Serialization error: {0}")] + SerializationError(#[from] serde_json::Error), } /// Emit a stacktrace onto the given handle as formatted json. @@ -134,6 +137,30 @@ unsafe fn emit_backtrace_by_frames( Ok(()) } +/// Crash-kind-specific data passed to `emit_crashreport`. +/// +/// Each variant carries exactly the fields that are meaningful for that crash +/// origin. the shared fields (config, metadata, procinfo, …) remain as plain +/// function parameters +pub(crate) enum CrashKindData { + UnixSignal { + sig_info: *const siginfo_t, + ucontext: *const ucontext_t, + }, + UnhandledException { + stacktrace: StackTrace, + }, +} + +impl CrashKindData { + fn error_kind(&self) -> ErrorKind { + match self { + CrashKindData::UnixSignal { .. } => ErrorKind::UnixSignal, + CrashKindData::UnhandledException { .. } => ErrorKind::UnhandledException, + } + } +} + #[allow(clippy::too_many_arguments)] pub(crate) fn emit_crashreport( pipe: &mut impl Write, @@ -141,24 +168,30 @@ pub(crate) fn emit_crashreport( config_str: &str, metadata_string: &str, message_ptr: *mut String, - sig_info: *const siginfo_t, - ucontext: *const ucontext_t, + crash: CrashKindData, ppid: i32, crashing_tid: libc::pid_t, ) -> Result<(), EmitterError> { - // The following order is important in order to emit the crash ping: - // - receiver expects the config because the endpoint to emit to is there - // - then message if any - // - then siginfo if any - // - then the kind if any - // - then metadata + // Crash-ping + // The receiver dispatches the crash ping as soon as it sees the metadata + // section, so try to emit message, siginfo, and kind before it to make sure + // we have an enhanced crash ping message emit_config(pipe, config_str)?; emit_message(pipe, message_ptr)?; - emit_siginfo(pipe, sig_info)?; - emit_kind(pipe, &ErrorKind::UnixSignal)?; + + match &crash { + CrashKindData::UnixSignal { sig_info, .. } => { + emit_siginfo(pipe, *sig_info)?; + } + CrashKindData::UnhandledException { .. } => { + // Unhandled exceptions have no signal info + } + } + + emit_kind(pipe, &crash.error_kind())?; emit_metadata(pipe, metadata_string)?; - // after the metadata the ping should have been sent - emit_ucontext(pipe, ucontext)?; + + // Shared process context emit_procinfo(pipe, ppid, crashing_tid)?; emit_counters(pipe)?; emit_spans(pipe)?; @@ -168,24 +201,50 @@ pub(crate) fn emit_crashreport( #[cfg(target_os = "linux")] emit_proc_self_maps(pipe)?; - // Getting a backtrace on rust is not guaranteed to be signal safe - // https://github.com/rust-lang/backtrace-rs/issues/414 - // let current_backtrace = backtrace::Backtrace::new(); - // In fact, if we look into the code here, we see mallocs. - // https://doc.rust-lang.org/src/std/backtrace.rs.html#332 - // Do this last, so even if it crashes, we still get the other info. - if config.resolve_frames() != StacktraceCollection::Disabled { - let fault_ip = extract_ip(ucontext); - unsafe { emit_backtrace_by_frames(pipe, config.resolve_frames(), fault_ip)? }; - } - - if is_runtime_callback_registered() { - emit_runtime_stack(pipe)?; + // Stack trace emission + match crash { + CrashKindData::UnixSignal { ucontext, .. } => { + emit_ucontext(pipe, ucontext)?; + if config.resolve_frames() != StacktraceCollection::Disabled { + // SAFETY: Getting a backtrace on rust is not guaranteed to be signal safe + // https://github.com/rust-lang/backtrace-rs/issues/414 + // let current_backtrace = backtrace::Backtrace::new(); + // In fact, if we look into the code here, we see mallocs. + // https://doc.rust-lang.org/src/std/backtrace.rs.html#332 + // We do this last, so even if it crashes, we still get the other info. + let fault_ip = extract_ip(ucontext); + unsafe { emit_backtrace_by_frames(pipe, config.resolve_frames(), fault_ip)? }; + } + if is_runtime_callback_registered() { + emit_runtime_stack(pipe)?; + } + } + CrashKindData::UnhandledException { stacktrace } => { + // SAFETY: this branch only executes when an unhandled exception occurs + // and is not called from a signal handler. + unsafe { emit_whole_stacktrace(pipe, stacktrace)? }; + } } writeln!(pipe, "{DD_CRASHTRACK_DONE}")?; pipe.flush()?; + Ok(()) +} +/// SAFETY: +/// This function is not safe to call from a signal handler. +/// Although `serde_json::to_writer` does not technically allocate memory +/// itself, it takes in `StackTrace` which is allocated and is only intended +/// to be used in a non-signal-handler context +unsafe fn emit_whole_stacktrace( + w: &mut impl Write, + stacktrace: StackTrace, +) -> Result<(), EmitterError> { + writeln!(w, "{DD_CRASHTRACK_BEGIN_WHOLE_STACKTRACE}")?; + let _ = serde_json::to_writer(&mut *w, &stacktrace); + writeln!(w)?; + writeln!(w, "{DD_CRASHTRACK_END_WHOLE_STACKTRACE}")?; + w.flush()?; Ok(()) } @@ -425,6 +484,8 @@ fn extract_ip(ucontext: *const ucontext_t) -> usize { #[cfg(test)] mod tests { + use crate::StackFrame; + use super::*; use std::str; @@ -446,6 +507,38 @@ mod tests { buf } + #[test] + #[cfg_attr(miri, ignore)] + fn test_emit_complete_stacktrace() { + // new_incomplete() starts with incomplete: true, which push_frame requires + let mut stacktrace = StackTrace::new_incomplete(); + let mut stackframe1 = StackFrame::new(); + stackframe1.with_ip(1234); + stackframe1.with_function("test_function1".to_string()); + stackframe1.with_file("test_file1".to_string()); + + let mut stackframe2 = StackFrame::new(); + stackframe2.with_ip(5678); + stackframe2.with_function("test_function2".to_string()); + stackframe2.with_file("test_file2".to_string()); + + stacktrace.push_frame(stackframe1, true).unwrap(); + stacktrace.push_frame(stackframe2, true).unwrap(); + + stacktrace.set_complete().unwrap(); + + let mut buf = Vec::new(); + unsafe { emit_whole_stacktrace(&mut buf, stacktrace).expect("to work ;-)") }; + let out = str::from_utf8(&buf).expect("to be valid UTF8"); + + assert!(out.contains("\"ip\":\"0x4d2\"")); + assert!(out.contains("\"function\":\"test_function1\"")); + assert!(out.contains("\"file\":\"test_file1\"")); + assert!(out.contains("\"ip\":\"0x162e\"")); + assert!(out.contains("\"function\":\"test_function2\"")); + assert!(out.contains("\"file\":\"test_file2\"")); + } + #[test] #[cfg_attr(miri, ignore)] fn test_emit_backtrace_disabled() { diff --git a/libdd-crashtracker/src/collector/mod.rs b/libdd-crashtracker/src/collector/mod.rs index 9fe75d1c86..5a67d90978 100644 --- a/libdd-crashtracker/src/collector/mod.rs +++ b/libdd-crashtracker/src/collector/mod.rs @@ -19,5 +19,7 @@ pub use additional_tags::{ }; pub use api::*; pub use counters::{begin_op, end_op, reset_counters, OpTypes}; -pub use crash_handler::{disable, enable, update_config, update_metadata}; +pub use crash_handler::{ + disable, enable, report_unhandled_exception, update_config, update_metadata, +}; pub use spans::{clear_spans, clear_traces, insert_span, insert_trace, remove_span, remove_trace}; diff --git a/libdd-crashtracker/src/collector/receiver_manager.rs b/libdd-crashtracker/src/collector/receiver_manager.rs index d3de3c7c4e..4701c8d72c 100644 --- a/libdd-crashtracker/src/collector/receiver_manager.rs +++ b/libdd-crashtracker/src/collector/receiver_manager.rs @@ -106,6 +106,19 @@ impl Receiver { } } + /// Resolves the right receiver for a given crashtracker config: + /// connects to `unix_socket_path` if set, otherwise spawns from stored receiver config. + pub(crate) fn from_crashtracker_config( + config: &crate::shared::configuration::CrashtrackerConfiguration, + ) -> Result { + let unix_socket_path = config.unix_socket_path().as_deref().unwrap_or_default(); + if unix_socket_path.is_empty() { + Self::spawn_from_stored_config() + } else { + Self::from_socket(unix_socket_path) + } + } + pub(crate) fn spawn_from_stored_config() -> Result { let receiver_config = RECEIVER_CONFIG.swap(ptr::null_mut(), SeqCst); if receiver_config.is_null() { diff --git a/libdd-crashtracker/src/lib.rs b/libdd-crashtracker/src/lib.rs index 4d0fd5626b..28a43109ff 100644 --- a/libdd-crashtracker/src/lib.rs +++ b/libdd-crashtracker/src/lib.rs @@ -76,7 +76,8 @@ pub use collector::{ begin_op, clear_additional_tags, clear_spans, clear_traces, consume_and_emit_additional_tags, default_signals, disable, enable, end_op, init, insert_additional_tag, insert_span, insert_trace, on_fork, reconfigure, remove_additional_tag, remove_span, remove_trace, - reset_counters, update_config, update_metadata, OpTypes, DEFAULT_SYMBOLS, + report_unhandled_exception, reset_counters, update_config, update_metadata, OpTypes, + DEFAULT_SYMBOLS, }; #[cfg(all(windows, feature = "collector_windows"))] diff --git a/libdd-crashtracker/src/receiver/receive_report.rs b/libdd-crashtracker/src/receiver/receive_report.rs index 9dc6ea6544..3c3e4fa289 100644 --- a/libdd-crashtracker/src/receiver/receive_report.rs +++ b/libdd-crashtracker/src/receiver/receive_report.rs @@ -7,7 +7,7 @@ use crate::{ }, runtime_callback::RuntimeStack, shared::constants::*, - CrashtrackerConfiguration, + CrashtrackerConfiguration, StackTrace, }; use anyhow::Context; @@ -117,6 +117,7 @@ pub(crate) enum StdinState { TraceIds, Ucontext, Waiting, + WholeStackTrace, ThreadName(Option), // StackFrame is always emitted as one stream of all the frames but StackString // may have lines that we need to accumulate depending on runtime (e.g. Python) @@ -170,6 +171,15 @@ fn process_line( StdinState::Counters } + StdinState::WholeStackTrace if line.starts_with(DD_CRASHTRACK_END_WHOLE_STACKTRACE) => { + StdinState::Waiting + } + StdinState::WholeStackTrace => { + let stacktrace: StackTrace = serde_json::from_str(line)?; + builder.with_stack(stacktrace)?; + StdinState::WholeStackTrace + } + StdinState::Done => { builder.with_log_message( format!("Unexpected line after crashreport is done: {line}"), @@ -349,6 +359,9 @@ fn process_line( StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_UCONTEXT) => { StdinState::Ucontext } + StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_WHOLE_STACKTRACE) => { + StdinState::WholeStackTrace + } StdinState::Waiting if line.starts_with(DD_CRASHTRACK_DONE) => { builder.with_incomplete(false)?; StdinState::Done diff --git a/libdd-crashtracker/src/shared/constants.rs b/libdd-crashtracker/src/shared/constants.rs index 207bf533b9..361e9f5ced 100644 --- a/libdd-crashtracker/src/shared/constants.rs +++ b/libdd-crashtracker/src/shared/constants.rs @@ -5,6 +5,7 @@ use std::time::Duration; pub const DD_CRASHTRACK_BEGIN_ADDITIONAL_TAGS: &str = "DD_CRASHTRACK_BEGIN_ADDITIONAL_TAGS"; pub const DD_CRASHTRACK_BEGIN_CONFIG: &str = "DD_CRASHTRACK_BEGIN_CONFIG"; +pub const DD_CRASHTRACK_BEGIN_WHOLE_STACKTRACE: &str = "DD_CRASHTRACK_BEGIN_WHOLE_STACKTRACE"; pub const DD_CRASHTRACK_BEGIN_COUNTERS: &str = "DD_CRASHTRACK_BEGIN_COUNTERS"; pub const DD_CRASHTRACK_BEGIN_FILE: &str = "DD_CRASHTRACK_BEGIN_FILE"; pub const DD_CRASHTRACK_BEGIN_KIND: &str = "DD_CRASHTRACK_BEGIN_KIND"; @@ -23,6 +24,7 @@ pub const DD_CRASHTRACK_BEGIN_MESSAGE: &str = "DD_CRASHTRACK_BEGIN_MESSAGE"; pub const DD_CRASHTRACK_DONE: &str = "DD_CRASHTRACK_DONE"; pub const DD_CRASHTRACK_END_ADDITIONAL_TAGS: &str = "DD_CRASHTRACK_END_ADDITIONAL_TAGS"; pub const DD_CRASHTRACK_END_CONFIG: &str = "DD_CRASHTRACK_END_CONFIG"; +pub const DD_CRASHTRACK_END_WHOLE_STACKTRACE: &str = "DD_CRASHTRACK_END_WHOLE_STACKTRACE"; pub const DD_CRASHTRACK_END_COUNTERS: &str = "DD_CRASHTRACK_END_COUNTERS"; pub const DD_CRASHTRACK_END_FILE: &str = "DD_CRASHTRACK_END_FILE"; pub const DD_CRASHTRACK_END_KIND: &str = "DD_CRASHTRACK_END_KIND"; diff --git a/tools/src/bin/ffi_test.rs b/tools/src/bin/ffi_test.rs index cb4c47af0c..6881da0a0f 100644 --- a/tools/src/bin/ffi_test.rs +++ b/tools/src/bin/ffi_test.rs @@ -128,6 +128,57 @@ fn skip_examples() -> &'static HashMap<&'static str, &'static str> { }) } +/// Per-test environment variables. The runner sets these before spawning +/// the test executable so that tests which need external resources (e.g. the +/// receiver binary) can find them without hard-coding paths. +fn per_test_env(name: &str, project_root: &Path) -> Vec<(String, String)> { + match name { + "crashtracking_unhandled_exception" => { + // The receiver binary and shared library may live in either + // "release/" (local/ffi_test build) or "artifacts/" (CI pre-built). + // Check both and use whichever exists. + let make_paths = |dir: &str| { + let base = project_root.join(dir); + ( + base.join("bin").join("libdatadog-crashtracking-receiver"), + base.join("lib"), + ) + }; + let (receiver, lib_dir) = ["release", "artifacts"] + .iter() + .map(|dir| make_paths(dir)) + .find(|(bin, _)| bin.exists()) + .unwrap_or_else(|| make_paths("release")); + + // The C test binary is dynamically linked against libdatadog_profiling.{so,dylib} + // which is not on the system library path. Set the platform-specific linker + // search path so the binary can load, and the C test forwards it via getenv() + // into the receiver's explicit execve environment. + // Linux → LD_LIBRARY_PATH + // macOS → DYLD_LIBRARY_PATH + #[cfg(target_os = "macos")] + let search_path_var = "DYLD_LIBRARY_PATH"; + #[cfg(not(target_os = "macos"))] + let search_path_var = "LD_LIBRARY_PATH"; + + let lib_path = match std::env::var(search_path_var) { + Ok(existing) if !existing.is_empty() => { + format!("{}:{}", lib_dir.display(), existing) + } + _ => lib_dir.display().to_string(), + }; + vec![ + ( + "DDOG_CRASHT_TEST_RECEIVER".to_string(), + receiver.display().to_string(), + ), + (search_path_var.to_string(), lib_path), + ] + } + _ => vec![], + } +} + fn expected_failures() -> &'static HashMap<&'static str, &'static str> { static MAP: OnceLock> = OnceLock::new(); MAP.get_or_init(|| { @@ -287,11 +338,16 @@ fn setup_work_dir(project_root: &Path) -> Result { } /// Spawn a test process and return child with captured output handles -fn spawn_test(exe_path: &Path, work_dir: &Path) -> Result { +fn spawn_test( + exe_path: &Path, + work_dir: &Path, + env_vars: &[(String, String)], +) -> Result { Command::new(exe_path) .current_dir(work_dir) .stdout(Stdio::piped()) .stderr(Stdio::piped()) + .envs(env_vars.iter().map(|(k, v)| (k, v))) .spawn() .with_context(|| format!("spawning {}", exe_path.display())) } @@ -387,11 +443,18 @@ fn determine_status( } } -fn run_test(name: &str, exe_path: &Path, work_dir: &Path, timeout: Duration) -> TestResult { +fn run_test( + name: &str, + exe_path: &Path, + work_dir: &Path, + project_root: &Path, + timeout: Duration, +) -> TestResult { let is_expected_failure = expected_failures().contains_key(name); + let env_vars = per_test_env(name, project_root); let start = Instant::now(); - let child = match spawn_test(exe_path, work_dir) { + let child = match spawn_test(exe_path, work_dir, &env_vars) { Ok(c) => c, Err(e) => { return TestResult { @@ -507,7 +570,7 @@ fn run_examples( continue; } - let result = run_test(name, exe, &work_dir, timeout); + let result = run_test(name, exe, &work_dir, project_root, timeout); result.print(); results.push(result); }