diff --git a/bin_tests/tests/crashtracker_bin_test.rs b/bin_tests/tests/crashtracker_bin_test.rs index 1b6cef9c9f..0f308d9be0 100644 --- a/bin_tests/tests/crashtracker_bin_test.rs +++ b/bin_tests/tests/crashtracker_bin_test.rs @@ -125,6 +125,40 @@ fn test_crash_tracking_bin_runtime_callback_frame() { run_crash_test_with_artifacts(&config, &artifacts_map, &artifacts, validator).unwrap(); } +#[test] +#[cfg(target_os = "linux")] +#[cfg_attr(miri, ignore)] +fn test_crash_tracking_thread_name() { + let config = CrashTestConfig::new( + BuildProfile::Release, + TestMode::DoNothing, + CrashType::NullDeref, + ); + let artifacts = StandardArtifacts::new(config.profile); + let artifacts_map = build_artifacts(&artifacts.as_slice()).unwrap(); + + let validator: ValidatorFn = Box::new(|payload, _fixtures| { + let error = &payload["error"]; + let thread_name = error["thread_name"] + .as_str() + .expect("thread_name should be present"); + assert!( + !thread_name.trim().is_empty(), + "thread_name should not be empty: {thread_name:?}" + ); + assert!( + // Cutting `crashtracker_bin_test` to `crashtracker_bin` because linux + // thread name is limited to 15 characters + thread_name.contains("crashtracker_bi"), + "thread_name should contain binary name: {thread_name:?}" + ); + + Ok(()) + }); + + run_crash_test_with_artifacts(&config, &artifacts_map, &artifacts, validator).unwrap(); +} + #[test] #[cfg_attr(miri, ignore)] fn test_crash_tracking_bin_runtime_callback_string() { @@ -1045,17 +1079,20 @@ fn assert_telemetry_message(crash_telemetry: &[u8], crash_typ: &str) { let tags = tags_raw .split(',') .filter(|t| !t.starts_with("uuid:")) + .map(|t| t.to_string()) .collect::>(); - let base_expected_tags: std::collections::HashSet<&str> = + let current_schema_version = libdd_crashtracker::CrashInfo::current_schema_version(); + + let base_expected_tags: std::collections::HashSet = std::collections::HashSet::from_iter([ - "data_schema_version:1.4", + format!("data_schema_version:{current_schema_version}"), // "incomplete:false", // TODO: re-add after fixing musl unwinding - "is_crash:true", - "profiler_collecting_sample:1", - "profiler_inactive:0", - "profiler_serializing:0", - "profiler_unwinding:0", + "is_crash:true".to_string(), + "profiler_collecting_sample:1".to_string(), + "profiler_inactive:0".to_string(), + "profiler_serializing:0".to_string(), + "profiler_unwinding:0".to_string(), ]); match crash_typ { diff --git a/docs/RFCs/0011-crashtracker-structured-log-format-V1_X.md b/docs/RFCs/0011-crashtracker-structured-log-format-V1_X.md index f709690436..d1f84cb851 100644 --- a/docs/RFCs/0011-crashtracker-structured-log-format-V1_X.md +++ b/docs/RFCs/0011-crashtracker-structured-log-format-V1_X.md @@ -78,6 +78,8 @@ Consumers of the crash data format SHOULD be designed to handle all versions fro - `stack`: **[required]** This represents the stack of the crashing thread. See below for more details on how stacktraces are formatted. + - `thread_name`: **[optional]** + Name of the crashing thread - `files`: **[optional]** A `Map` where `contents` is an array of plain text strings, one per line. Useful files for triage and debugging, such as `/proc/self/maps` or `/proc/meminfo`. @@ -273,6 +275,15 @@ This section documents the evolution of the crashtracker structured log format a **Motivation:** When symbol names are demangled for readability, the original mangled names are lost. This makes debugging difficult when mangled names are needed (e.g., comparing against compiler-generated symbols). The `mangled_name` field preserves the original mangled name when demangling occurs. +### Version 1.5 +*Added thread_name to `ErrorData` + +**Changes from v1.4:** +- Added `thread_name` field to `Error` objects (optional string) +- Updated `data_schema_version` to "1.5" + +**Motivation:** Having access to thread name of the crashing thread helps debugging, especially within multithreaded programs. + ## Appendix A: Example output An example crash report in version 1.0 format is [available here](artifacts/0005-crashtracker-example.json). @@ -281,7 +292,7 @@ Note: This example uses version 1.0 format. Version 1.1+ may include additional ## Appendix B: Json Schema -The current JSON schema (version 1.4) is [available here](artifacts/0009-crashtracker-schema.json). +The current JSON schema (version 1.5) is [available here](artifacts/crashtracker-unified-runtime-stack-schema-v1_5.json). Historical schemas are also available: - [Version 1.0 schema](artifacts/0005-crashtracker-schema.json) diff --git a/docs/RFCs/artifacts/crashtracker-unified-runtime-stack-schema-v1_5.json b/docs/RFCs/artifacts/crashtracker-unified-runtime-stack-schema-v1_5.json new file mode 100644 index 0000000000..c2a810f87d --- /dev/null +++ b/docs/RFCs/artifacts/crashtracker-unified-runtime-stack-schema-v1_5.json @@ -0,0 +1,583 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CrashInfo", + "type": "object", + "required": [ + "data_schema_version", + "error", + "incomplete", + "metadata", + "os_info", + "timestamp", + "uuid" + ], + "properties": { + "counters": { + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "int64" + } + }, + "data_schema_version": { + "type": "string" + }, + "error": { + "$ref": "#/definitions/ErrorData" + }, + "experimental": { + "anyOf": [ + { + "$ref": "#/definitions/Experimental" + }, + { + "type": "null" + } + ] + }, + "files": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "fingerprint": { + "type": [ + "string", + "null" + ] + }, + "incomplete": { + "type": "boolean" + }, + "log_messages": { + "type": "array", + "items": { + "type": "string" + } + }, + "metadata": { + "$ref": "#/definitions/Metadata" + }, + "os_info": { + "$ref": "#/definitions/OsInfo" + }, + "proc_info": { + "anyOf": [ + { + "$ref": "#/definitions/ProcInfo" + }, + { + "type": "null" + } + ] + }, + "sig_info": { + "anyOf": [ + { + "$ref": "#/definitions/SigInfo" + }, + { + "type": "null" + } + ] + }, + "span_ids": { + "type": "array", + "items": { + "$ref": "#/definitions/Span" + } + }, + "timestamp": { + "type": "string" + }, + "trace_ids": { + "type": "array", + "items": { + "$ref": "#/definitions/Span" + } + }, + "uuid": { + "type": "string" + } + }, + "definitions": { + "BuildIdType": { + "type": "string", + "enum": [ + "GNU", + "GO", + "PDB", + "SHA1" + ] + }, + "ErrorData": { + "type": "object", + "required": [ + "is_crash", + "kind", + "source_type", + "stack" + ], + "properties": { + "is_crash": { + "type": "boolean" + }, + "kind": { + "$ref": "#/definitions/ErrorKind" + }, + "message": { + "type": [ + "string", + "null" + ] + }, + "source_type": { + "$ref": "#/definitions/SourceType" + }, + "stack": { + "$ref": "#/definitions/StackTrace" + }, + "thread_name": { + "type": [ + "string", + "null" + ] + }, + "threads": { + "type": "array", + "items": { + "$ref": "#/definitions/ThreadData" + } + } + } + }, + "ErrorKind": { + "type": "string", + "enum": [ + "Panic", + "UnhandledException", + "UnixSignal" + ] + }, + "Experimental": { + "type": "object", + "properties": { + "additional_tags": { + "type": "array", + "items": { + "type": "string" + } + }, + "runtime_stack": { + "anyOf": [ + { + "$ref": "#/definitions/RuntimeStack" + }, + { + "type": "null" + } + ] + }, + "ucontext": { + "type": [ + "string", + "null" + ] + } + } + }, + "FileType": { + "type": "string", + "enum": [ + "APK", + "ELF", + "PE" + ] + }, + "Metadata": { + "type": "object", + "required": [ + "family", + "library_name", + "library_version" + ], + "properties": { + "family": { + "type": "string" + }, + "library_name": { + "type": "string" + }, + "library_version": { + "type": "string" + }, + "tags": { + "description": "A list of \"key:value\" tuples.", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "OsInfo": { + "type": "object", + "required": [ + "architecture", + "bitness", + "os_type", + "version" + ], + "properties": { + "architecture": { + "type": "string" + }, + "bitness": { + "type": "string" + }, + "os_type": { + "type": "string" + }, + "version": { + "type": "string" + } + } + }, + "ProcInfo": { + "type": "object", + "required": [ + "pid" + ], + "properties": { + "pid": { + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "tid": { + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + } + } + }, + "RuntimeStack": { + "description": "Runtime stack representation for JSON serialization", + "type": "object", + "required": [ + "format" + ], + "properties": { + "format": { + "type": "string" + }, + "frames": { + "description": "Array of runtime-specific stack frames (optional, mutually exclusive with stacktrace_string)", + "type": "array", + "items": { + "$ref": "#/definitions/StackFrame" + } + }, + "stacktrace_string": { + "description": "Raw stacktrace string (optional, mutually exclusive with frames)", + "type": [ + "string", + "null" + ] + } + } + }, + "SiCodes": { + "description": "See https://man7.org/linux/man-pages/man2/sigaction.2.html MUST REMAIN IN SYNC WITH THE ENUM IN emit_sigcodes.c", + "type": "string", + "enum": [ + "BUS_ADRALN", + "BUS_ADRERR", + "BUS_MCEERR_AO", + "BUS_MCEERR_AR", + "BUS_OBJERR", + "ILL_BADSTK", + "ILL_COPROC", + "ILL_ILLADR", + "ILL_ILLOPC", + "ILL_ILLOPN", + "ILL_ILLTRP", + "ILL_PRVOPC", + "ILL_PRVREG", + "SEGV_ACCERR", + "SEGV_BNDERR", + "SEGV_MAPERR", + "SEGV_PKUERR", + "SI_ASYNCIO", + "SI_KERNEL", + "SI_MESGQ", + "SI_QUEUE", + "SI_SIGIO", + "SI_TIMER", + "SI_TKILL", + "SI_USER", + "SYS_SECCOMP", + "UNKNOWN" + ] + }, + "SigInfo": { + "type": "object", + "required": [ + "si_code", + "si_code_human_readable", + "si_signo", + "si_signo_human_readable" + ], + "properties": { + "si_addr": { + "type": [ + "string", + "null" + ] + }, + "si_code": { + "type": "integer", + "format": "int32" + }, + "si_code_human_readable": { + "$ref": "#/definitions/SiCodes" + }, + "si_signo": { + "type": "integer", + "format": "int32" + }, + "si_signo_human_readable": { + "$ref": "#/definitions/SignalNames" + } + } + }, + "SignalNames": { + "description": "See https://man7.org/linux/man-pages/man7/signal.7.html", + "type": "string", + "enum": [ + "SIGHUP", + "SIGINT", + "SIGQUIT", + "SIGILL", + "SIGTRAP", + "SIGABRT", + "SIGBUS", + "SIGFPE", + "SIGKILL", + "SIGUSR1", + "SIGSEGV", + "SIGUSR2", + "SIGPIPE", + "SIGALRM", + "SIGTERM", + "SIGCHLD", + "SIGCONT", + "SIGSTOP", + "SIGTSTP", + "SIGTTIN", + "SIGTTOU", + "SIGURG", + "SIGXCPU", + "SIGXFSZ", + "SIGVTALRM", + "SIGPROF", + "SIGWINCH", + "SIGIO", + "SIGSYS", + "SIGEMT", + "SIGINFO", + "UNKNOWN" + ] + }, + "SourceType": { + "type": "string", + "enum": [ + "Crashtracking" + ] + }, + "Span": { + "type": "object", + "required": [ + "id" + ], + "properties": { + "id": { + "type": "string" + }, + "thread_name": { + "type": [ + "string", + "null" + ] + } + } + }, + "StackFrame": { + "type": "object", + "properties": { + "build_id": { + "type": [ + "string", + "null" + ] + }, + "build_id_type": { + "anyOf": [ + { + "$ref": "#/definitions/BuildIdType" + }, + { + "type": "null" + } + ] + }, + "column": { + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, + "comments": { + "type": "array", + "items": { + "type": "string" + } + }, + "file": { + "type": [ + "string", + "null" + ] + }, + "file_type": { + "anyOf": [ + { + "$ref": "#/definitions/FileType" + }, + { + "type": "null" + } + ] + }, + "function": { + "type": [ + "string", + "null" + ] + }, + "ip": { + "type": [ + "string", + "null" + ] + }, + "line": { + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, + "mangled_name": { + "type": [ + "string", + "null" + ] + }, + "module_base_address": { + "type": [ + "string", + "null" + ] + }, + "path": { + "type": [ + "string", + "null" + ] + }, + "relative_address": { + "type": [ + "string", + "null" + ] + }, + "sp": { + "type": [ + "string", + "null" + ] + }, + "symbol_address": { + "type": [ + "string", + "null" + ] + }, + "type_name": { + "type": [ + "string", + "null" + ] + } + } + }, + "StackTrace": { + "type": "object", + "required": [ + "format", + "frames", + "incomplete" + ], + "properties": { + "format": { + "type": "string" + }, + "frames": { + "type": "array", + "items": { + "$ref": "#/definitions/StackFrame" + } + }, + "incomplete": { + "type": "boolean" + } + } + }, + "ThreadData": { + "type": "object", + "required": [ + "crashed", + "name", + "stack" + ], + "properties": { + "crashed": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "stack": { + "$ref": "#/definitions/StackTrace" + }, + "state": { + "type": [ + "string", + "null" + ] + } + } + } + } +} \ No newline at end of file diff --git a/libdd-crashtracker-ffi/src/crash_info/builder.rs b/libdd-crashtracker-ffi/src/crash_info/builder.rs index fa07daa5a0..213ac433d9 100644 --- a/libdd-crashtracker-ffi/src/crash_info/builder.rs +++ b/libdd-crashtracker-ffi/src/crash_info/builder.rs @@ -403,6 +403,23 @@ pub unsafe extern "C" fn ddog_crasht_CrashInfoBuilder_with_message( }) } +/// # Safety +/// The `builder` can be null, but if non-null it must point to a Builder made by this module, +/// which has not previously been dropped. +/// The CharSlice must be valid. +#[no_mangle] +#[must_use] +#[named] +pub unsafe extern "C" fn ddog_crasht_CrashInfoBuilder_with_thread_name( + mut builder: *mut Handle, + thread_name: CharSlice, +) -> VoidResult { + wrap_with_void_ffi_result!({ + let thread_name = thread_name.try_to_string()?; + builder.to_inner_mut()?.with_thread_name(thread_name)?; + }) +} + //////////////////////////////////////////////////////////////////////////////////////////////////// // Crash Ping // //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/libdd-crashtracker-ffi/src/crash_info/proc_info.rs b/libdd-crashtracker-ffi/src/crash_info/proc_info.rs index 33f654d593..6c10e6bb79 100644 --- a/libdd-crashtracker-ffi/src/crash_info/proc_info.rs +++ b/libdd-crashtracker-ffi/src/crash_info/proc_info.rs @@ -4,11 +4,21 @@ #[repr(C)] pub struct ProcInfo { pid: u32, + /// Optional crashing thread id; 0 means unset. + tid: u32, } impl TryFrom for libdd_crashtracker::ProcInfo { type Error = anyhow::Error; fn try_from(value: ProcInfo) -> anyhow::Result { - Ok(Self { pid: value.pid }) + let tid = if value.tid == 0 { + None + } else { + Some(value.tid) + }; + Ok(Self { + pid: value.pid, + tid, + }) } } diff --git a/libdd-crashtracker/src/collector/collector_manager.rs b/libdd-crashtracker/src/collector/collector_manager.rs index a3e3fb1e65..9abbe9d652 100644 --- a/libdd-crashtracker/src/collector/collector_manager.rs +++ b/libdd-crashtracker/src/collector/collector_manager.rs @@ -38,6 +38,9 @@ impl Collector { // SAFETY: This function has no safety requirements. let pid = unsafe { libc::getpid() }; + // Get the current tid to identify thread info + let tid = current_tid(); + let fork_result = alt_fork(); match fork_result { 0 => { @@ -51,6 +54,7 @@ impl Collector { ucontext, receiver.handle.uds_fd, pid, + tid, ); } pid if pid > 0 => Ok(Self { @@ -68,6 +72,18 @@ impl Collector { } } +#[cfg(target_os = "linux")] +fn current_tid() -> libc::pid_t { + // Prefer the raw syscall to avoid linking against libc's gettid symbol on glibc versions + // where it may not be exposed. + unsafe { libc::syscall(libc::SYS_gettid) as libc::pid_t } +} + +#[cfg(not(target_os = "linux"))] +fn current_tid() -> libc::pid_t { + 0 +} + #[allow(clippy::too_many_arguments)] pub(crate) fn run_collector_child( config: &CrashtrackerConfiguration, @@ -78,6 +94,7 @@ pub(crate) fn run_collector_child( ucontext: *const ucontext_t, uds_fd: RawFd, ppid: libc::pid_t, + crashing_tid: libc::pid_t, ) -> ! { // Close stdio let _ = unsafe { libc::close(0) }; @@ -104,6 +121,7 @@ pub(crate) fn run_collector_child( sig_info, ucontext, ppid, + crashing_tid, ); if let Err(e) = report { eprintln!("Failed to flush crash report: {e}"); diff --git a/libdd-crashtracker/src/collector/emitters.rs b/libdd-crashtracker/src/collector/emitters.rs index 35e061d4e6..949cba1b0c 100644 --- a/libdd-crashtracker/src/collector/emitters.rs +++ b/libdd-crashtracker/src/collector/emitters.rs @@ -142,6 +142,7 @@ pub(crate) fn emit_crashreport( sig_info: *const siginfo_t, ucontext: *const ucontext_t, ppid: i32, + crashing_tid: libc::pid_t, ) -> Result<(), EmitterError> { // The following order is important in order to emit the crash ping: // - receiver expects the config @@ -154,7 +155,7 @@ pub(crate) fn emit_crashreport( emit_metadata(pipe, metadata_string)?; // after the metadata the ping should have been sent emit_ucontext(pipe, ucontext)?; - emit_procinfo(pipe, ppid)?; + emit_procinfo(pipe, ppid, crashing_tid)?; emit_counters(pipe)?; emit_spans(pipe)?; consume_and_emit_additional_tags(pipe)?; @@ -213,9 +214,9 @@ fn emit_message(w: &mut impl Write, message_ptr: *mut String) -> Result<(), Emit Ok(()) } -fn emit_procinfo(w: &mut impl Write, pid: i32) -> Result<(), EmitterError> { +fn emit_procinfo(w: &mut impl Write, pid: i32, tid: libc::pid_t) -> Result<(), EmitterError> { writeln!(w, "{DD_CRASHTRACK_BEGIN_PROCINFO}")?; - writeln!(w, "{{\"pid\": {pid} }}")?; + writeln!(w, "{{\"pid\": {pid}, \"tid\": {tid} }}")?; writeln!(w, "{DD_CRASHTRACK_END_PROCINFO}")?; w.flush()?; Ok(()) @@ -583,6 +584,23 @@ mod tests { unsafe { drop(Box::from_raw(message_ptr)) }; } + #[test] + #[cfg(target_os = "linux")] + #[cfg_attr(miri, ignore)] + fn test_emit_procinfo() { + let pid = unsafe { libc::getpid() }; + let tid = unsafe { libc::syscall(libc::SYS_gettid) as libc::pid_t }; + let mut buf = Vec::new(); + + emit_procinfo(&mut buf, pid, tid).expect("procinfo to emit"); + let proc_info_block = str::from_utf8(&buf).expect("to be valid UTF8"); + assert!(proc_info_block.contains(DD_CRASHTRACK_BEGIN_PROCINFO)); + assert!(proc_info_block.contains(DD_CRASHTRACK_END_PROCINFO)); + + assert!(proc_info_block.contains(&format!("\"pid\": {pid}"))); + assert!(proc_info_block.contains(&format!("\"tid\": {tid}"))); + } + #[test] #[cfg_attr(miri, ignore)] fn test_emit_message_very_long() { diff --git a/libdd-crashtracker/src/crash_info/builder.rs b/libdd-crashtracker/src/crash_info/builder.rs index c4dc262384..2cae463ff7 100644 --- a/libdd-crashtracker/src/crash_info/builder.rs +++ b/libdd-crashtracker/src/crash_info/builder.rs @@ -16,6 +16,7 @@ use super::*; pub struct ErrorDataBuilder { pub kind: Option, pub message: Option, + pub thread_name: Option, pub stack: Option, pub threads: Option>, } @@ -26,6 +27,7 @@ impl ErrorDataBuilder { let is_crash = true; let kind = self.kind.context("required field 'kind' missing")?; let message = self.message; + let thread_name = self.thread_name; let source_type = SourceType::Crashtracking; let stack = self.stack.unwrap_or_else(StackTrace::missing); let threads = self.threads.unwrap_or_default(); @@ -34,6 +36,7 @@ impl ErrorDataBuilder { is_crash, kind, message, + thread_name, source_type, stack, threads, @@ -56,6 +59,14 @@ impl ErrorDataBuilder { Ok(()) } + pub fn with_thread_name(&mut self, thread_name: String) -> anyhow::Result<()> { + if thread_name.trim().is_empty() { + return Ok(()); + } + self.thread_name = Some(thread_name); + Ok(()) + } + pub fn with_stack(&mut self, stack: StackTrace) -> anyhow::Result<()> { self.stack = Some(stack); Ok(()) @@ -292,6 +303,10 @@ impl CrashInfoBuilder { self.error.with_message(message) } + pub fn with_thread_name(&mut self, thread_name: String) -> anyhow::Result<()> { + self.error.with_thread_name(thread_name) + } + pub fn with_metadata(&mut self, metadata: Metadata) -> anyhow::Result<()> { self.metadata = Some(metadata); Ok(()) diff --git a/libdd-crashtracker/src/crash_info/cxx.rs b/libdd-crashtracker/src/crash_info/cxx.rs index 3690e27acd..34033f23f6 100644 --- a/libdd-crashtracker/src/crash_info/cxx.rs +++ b/libdd-crashtracker/src/crash_info/cxx.rs @@ -45,6 +45,7 @@ pub mod ffi { struct ProcInfo { pid: u32, + tid: u32, } struct OsInfo { @@ -177,7 +178,15 @@ impl CrashInfoBuilder { } pub fn set_proc_info(&mut self, proc_info: ffi::ProcInfo) -> anyhow::Result<()> { - let internal_proc_info = crate::ProcInfo { pid: proc_info.pid }; + let tid = if proc_info.tid == 0 { + None + } else { + Some(proc_info.tid) + }; + let internal_proc_info = crate::ProcInfo { + pid: proc_info.pid, + tid, + }; self.with_proc_info(internal_proc_info) } diff --git a/libdd-crashtracker/src/crash_info/error_data.rs b/libdd-crashtracker/src/crash_info/error_data.rs index 574fcff308..62ea4492a8 100644 --- a/libdd-crashtracker/src/crash_info/error_data.rs +++ b/libdd-crashtracker/src/crash_info/error_data.rs @@ -18,6 +18,8 @@ pub struct ErrorData { pub kind: ErrorKind, #[serde(default, skip_serializing_if = "Option::is_none")] pub message: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub thread_name: Option, pub source_type: SourceType, pub stack: StackTrace, #[serde(default, skip_serializing_if = "Vec::is_empty")] @@ -203,6 +205,7 @@ impl super::test_utils::TestInstance for ErrorData { is_crash: true, kind: ErrorKind::UnixSignal, message: None, + thread_name: None, source_type: SourceType::Crashtracking, stack: StackTrace::test_instance(seed), threads: vec![], diff --git a/libdd-crashtracker/src/crash_info/errors_intake.rs b/libdd-crashtracker/src/crash_info/errors_intake.rs index f2962af00f..12b901474e 100644 --- a/libdd-crashtracker/src/crash_info/errors_intake.rs +++ b/libdd-crashtracker/src/crash_info/errors_intake.rs @@ -626,7 +626,7 @@ mod tests { assert!(ddtags.contains("version:bar")); assert!(ddtags.contains("language_name:native")); - assert!(ddtags.contains("data_schema_version:1.4")); + assert!(ddtags.contains("data_schema_version:1.5")); assert!(ddtags.contains("incomplete:true")); assert!(ddtags.contains("is_crash:true")); assert!(ddtags.contains("uuid:1d6b97cb-968c-40c9-af6e-e4b4d71e8781")); @@ -678,7 +678,7 @@ mod tests { let payload = ErrorsIntakePayload::from_crash_info(&crash_info).unwrap(); let expected_crash_tags = [ - "data_schema_version:1.4", + "data_schema_version:1.5", "incomplete:true", "is_crash:true", "uuid:1d6b97cb-968c-40c9-af6e-e4b4d71e8781", diff --git a/libdd-crashtracker/src/crash_info/mod.rs b/libdd-crashtracker/src/crash_info/mod.rs index ffe3cf4b1d..7ed811f6cf 100644 --- a/libdd-crashtracker/src/crash_info/mod.rs +++ b/libdd-crashtracker/src/crash_info/mod.rs @@ -73,7 +73,7 @@ pub struct CrashInfo { impl CrashInfo { pub fn current_schema_version() -> String { - "1.4".to_string() + "1.5".to_string() } pub fn demangle_names(&mut self) -> anyhow::Result<()> { @@ -177,7 +177,7 @@ mod tests { fn test_schema_matches_rfc() { let rfc_schema_filename = concat!( env!("CARGO_MANIFEST_DIR"), - "/../docs/RFCs/artifacts/0011-crashtracker-unified-runtime-stack-schema.json" + "/../docs/RFCs/artifacts/crashtracker-unified-runtime-stack-schema-v1_5.json" ); let schema = schemars::schema_for!(CrashInfo); let schema_json = serde_json::to_string_pretty(&schema).expect("Schema to serialize"); diff --git a/libdd-crashtracker/src/crash_info/proc_info.rs b/libdd-crashtracker/src/crash_info/proc_info.rs index 35c2368e45..03ce43353a 100644 --- a/libdd-crashtracker/src/crash_info/proc_info.rs +++ b/libdd-crashtracker/src/crash_info/proc_info.rs @@ -6,11 +6,16 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] pub struct ProcInfo { pub pid: u32, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tid: Option, } #[cfg(test)] impl super::test_utils::TestInstance for ProcInfo { fn test_instance(seed: u64) -> Self { - Self { pid: seed as u32 } + Self { + pid: seed as u32, + tid: None, + } } } diff --git a/libdd-crashtracker/src/crash_info/telemetry.rs b/libdd-crashtracker/src/crash_info/telemetry.rs index 1b9dda6608..6a859726b4 100644 --- a/libdd-crashtracker/src/crash_info/telemetry.rs +++ b/libdd-crashtracker/src/crash_info/telemetry.rs @@ -512,7 +512,7 @@ mod tests { assert_eq!( HashSet::from_iter([ "collecting_sample:1", - "data_schema_version:1.4", + "data_schema_version:1.5", "incomplete:true", "is_crash:true", "not_profiling:0", diff --git a/libdd-crashtracker/src/receiver/receive_report.rs b/libdd-crashtracker/src/receiver/receive_report.rs index 65d5ff903f..8cf1557572 100644 --- a/libdd-crashtracker/src/receiver/receive_report.rs +++ b/libdd-crashtracker/src/receiver/receive_report.rs @@ -15,6 +15,7 @@ use libdd_telemetry::data::LogLevel; use serde::{Deserialize, Serialize}; use std::sync::Arc; use std::time::{Duration, Instant}; +use std::{fs, path::PathBuf}; use tokio::io::AsyncBufReadExt; #[derive(Debug)] @@ -116,6 +117,7 @@ pub(crate) enum StdinState { TraceIds, Ucontext, Waiting, + ThreadName(Option), // StackFrame is always emitted as one stream of all the frames but StackString // may have lines that we need to accumulate depending on runtime (e.g. Python) RuntimeStackFrame(Vec), @@ -269,6 +271,22 @@ fn process_line( StdinState::StackTrace } + StdinState::ThreadName(thread_name) if line.starts_with(DD_CRASHTRACK_END_THREAD_NAME) => { + if let Some(thread_name) = thread_name { + builder.with_thread_name(thread_name)?; + } else { + builder.with_log_message( + "Thread name block ended without content".to_string(), + true, + )?; + } + StdinState::Waiting + } + StdinState::ThreadName(_) => { + let name = line.trim_end_matches('\n').to_string(); + StdinState::ThreadName(Some(name)) + } + StdinState::TraceIds if line.starts_with(DD_CRASHTRACK_END_TRACE_IDS) => { StdinState::Waiting } @@ -317,6 +335,9 @@ fn process_line( StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_TRACE_IDS) => { StdinState::TraceIds } + StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_THREAD_NAME) => { + StdinState::ThreadName(None) + } StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_UCONTEXT) => { StdinState::Ucontext } @@ -467,6 +488,7 @@ pub(crate) async fn receive_report_from_stream( // For now, we only support Signal based crash detection in the receiver. builder.with_kind(ErrorKind::UnixSignal)?; + enrich_thread_name(&mut builder)?; builder.with_os_info_this_machine()?; // Without a config, we don't even know the endpoint to transmit to. Not much to do to recover. @@ -500,6 +522,35 @@ pub(crate) async fn receive_report_from_stream( Ok(Some((config, crash_info))) } +#[cfg(target_os = "linux")] +fn enrich_thread_name(builder: &mut CrashInfoBuilder) -> anyhow::Result<()> { + if builder.error.thread_name.is_some() { + return Ok(()); + } + let Some(proc_info) = builder.proc_info.as_ref() else { + return Ok(()); + }; + let Some(tid) = proc_info.tid else { + return Ok(()); + }; + let pid = proc_info.pid; + let path = PathBuf::from(format!("/proc/{pid}/task/{tid}/comm")); + let Ok(comm) = fs::read_to_string(&path) else { + return Ok(()); + }; + let thread_name = comm.trim_end_matches('\n'); + if thread_name.is_empty() { + return Ok(()); + } + builder.with_thread_name(thread_name.to_string())?; + Ok(()) +} + +#[cfg(not(target_os = "linux"))] +fn enrich_thread_name(_builder: &mut CrashInfoBuilder) -> anyhow::Result<()> { + Ok(()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/libdd-crashtracker/src/shared/constants.rs b/libdd-crashtracker/src/shared/constants.rs index 85d6029ec1..910bd5a018 100644 --- a/libdd-crashtracker/src/shared/constants.rs +++ b/libdd-crashtracker/src/shared/constants.rs @@ -9,6 +9,7 @@ pub const DD_CRASHTRACK_BEGIN_COUNTERS: &str = "DD_CRASHTRACK_BEGIN_COUNTERS"; pub const DD_CRASHTRACK_BEGIN_FILE: &str = "DD_CRASHTRACK_BEGIN_FILE"; pub const DD_CRASHTRACK_BEGIN_METADATA: &str = "DD_CRASHTRACK_BEGIN_METADATA"; pub const DD_CRASHTRACK_BEGIN_PROCINFO: &str = "DD_CRASHTRACK_BEGIN_PROCESSINFO"; +pub const DD_CRASHTRACK_BEGIN_THREAD_NAME: &str = "DD_CRASHTRACK_BEGIN_THREAD_NAME"; pub const DD_CRASHTRACK_BEGIN_RUNTIME_STACK_FRAME: &str = "DD_CRASHTRACK_BEGIN_RUNTIME_STACK_FRAME"; pub const DD_CRASHTRACK_BEGIN_RUNTIME_STACK_STRING: &str = "DD_CRASHTRACK_BEGIN_RUNTIME_STACK_STRING"; @@ -25,6 +26,7 @@ pub const DD_CRASHTRACK_END_COUNTERS: &str = "DD_CRASHTRACK_END_COUNTERS"; pub const DD_CRASHTRACK_END_FILE: &str = "DD_CRASHTRACK_END_FILE"; pub const DD_CRASHTRACK_END_METADATA: &str = "DD_CRASHTRACK_END_METADATA"; pub const DD_CRASHTRACK_END_PROCINFO: &str = "DD_CRASHTRACK_END_PROCESSINFO"; +pub const DD_CRASHTRACK_END_THREAD_NAME: &str = "DD_CRASHTRACK_END_THREAD_NAME"; pub const DD_CRASHTRACK_END_RUNTIME_STACK_FRAME: &str = "DD_CRASHTRACK_END_RUNTIME_STACK_FRAME"; pub const DD_CRASHTRACK_END_RUNTIME_STACK_STRING: &str = "DD_CRASHTRACK_END_RUNTIME_STACK_STRING"; pub const DD_CRASHTRACK_END_SIGINFO: &str = "DD_CRASHTRACK_END_SIGINFO";