Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 30 additions & 17 deletions crashtracker/src/collector/crash_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ use super::emitters::emit_crashreport;
use super::saguard::SaGuard;
use crate::crash_info::CrashtrackerMetadata;
use crate::shared::configuration::{CrashtrackerConfiguration, CrashtrackerReceiverConfig};
use crate::shared::constants::*;
use anyhow::Context;
use libc::{
c_void, execve, mmap, sigaltstack, siginfo_t, MAP_ANON, MAP_FAILED, MAP_PRIVATE, PROT_NONE,
PROT_READ, PROT_WRITE, SIGSTKSZ,
};
use nix::poll::{poll, PollFd, PollFlags};
use libc::{poll, pollfd, POLLHUP};
use nix::sys::signal::{self, SaFlags, SigAction, SigHandler, SigSet};
use nix::sys::socket;
use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus};
Expand All @@ -22,7 +23,7 @@ use std::ffi::CString;
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::os::unix::{
io::{BorrowedFd, FromRawFd, IntoRawFd, RawFd},
io::{FromRawFd, IntoRawFd, RawFd},
net::UnixStream,
};
use std::ptr;
Expand Down Expand Up @@ -228,20 +229,29 @@ fn run_receiver_child(uds_parent: RawFd, uds_child: RawFd, stderr: RawFd, stdout
}

fn wait_for_pollhup(target_fd: RawFd, timeout_ms: i32) -> anyhow::Result<bool> {
// Need to convert the RawFd into a BorrowedFd to satisfy the PollFd prototype
let target_fd = unsafe { BorrowedFd::borrow_raw(target_fd) };
let poll_fd = PollFd::new(&target_fd, PollFlags::POLLHUP);

match poll(&mut [poll_fd], timeout_ms)? {
-1 => Err(anyhow::anyhow!("poll failed")),
0 => Ok(false),
_ => match poll_fd
.revents()
.ok_or_else(|| anyhow::anyhow!("No revents found"))?
{
revents if revents.contains(PollFlags::POLLHUP) => Ok(true),
_ => Err(anyhow::anyhow!("poll returned unexpected result")),
},
let mut poll_fds = [pollfd {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment to explain the meaning of the boolean result

fd: target_fd,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 - BorrowedFd prefferably should be use in conjuction with OwnedFd. borrow_raw - without any guarantees of FD lifetime is problematic.

Probably the safest option would be to dup the fd - and own it within the context of this function.

Otherwise the code looks like correct but "C'ish" rust :)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#758 to track

events: POLLHUP,
revents: 0,
}];

match unsafe { poll(poll_fds.as_mut_ptr(), 1, timeout_ms) } {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: this should be .len not constant 1

-1 => Err(anyhow::anyhow!(
"poll failed with errno: {}",
std::io::Error::last_os_error()
)),
0 => Ok(false), // Timeout occurred
_ => {
let revents = poll_fds[0].revents;
if revents & POLLHUP != 0 {
Ok(true) // POLLHUP detected
} else {
Err(anyhow::anyhow!(
"poll returned unexpected result: revents = {}",
revents
))
}
}
}
}

Expand Down Expand Up @@ -484,7 +494,10 @@ fn receiver_finish(receiver: Receiver, start_time: Instant, timeout_ms: u32) {
}

let receiver_pid_as_pid = Pid::from_raw(receiver.receiver_pid);
let reaping_allowed_ms = timeout_ms.saturating_sub(start_time.elapsed().as_millis() as u32);
let reaping_allowed_ms = std::cmp::min(
timeout_ms.saturating_sub(start_time.elapsed().as_millis() as u32),
DD_CRASHTRACK_MINIMUM_REAP_TIME_MS,
);

let _ = reap_child_non_blocking(receiver_pid_as_pid, reaping_allowed_ms);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return Err(anyhow::anyhow!("Timeout waiting for child process to exit"));

In sidecar - we send kill and term. When the timeout ends.

And it looks that - we're not doing that here either way - so a non 0 timeout will only reduce the incidence of zombies. Not prevent them.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}
Expand Down
2 changes: 2 additions & 0 deletions crashtracker/src/shared/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ pub const DD_CRASHTRACK_END_SPAN_IDS: &str = "DD_CRASHTRACK_END_SPAN_IDS";
pub const DD_CRASHTRACK_END_STACKTRACE: &str = "DD_CRASHTRACK_END_STACKTRACE";
pub const DD_CRASHTRACK_END_TRACE_IDS: &str = "DD_CRASHTRACK_END_TRACE_IDS";
pub const DD_CRASHTRACK_DEFAULT_TIMEOUT_MS: u32 = 5_000;
pub const DD_CRASHTRACK_MINIMUM_REAP_TIME_MS: u32 = 160; // 4ms per sched slice, give ~4x10 slices
// for safety