From e64a48e66b0b0717b2260269425817f72f224842 Mon Sep 17 00:00:00 2001 From: Ayato Tokubi Date: Tue, 31 Mar 2026 02:16:35 +0000 Subject: [PATCH] Fix SIGCHLD race in signal handler setup Add an initial reap in forward() before entering the signal loop. Since signal.Notify runs in a goroutine, a fast-exiting process could trigger SIGCHLD before signal registration completes, causing the signal to be silently discarded. This left forward() blocking forever on a signal that would never arrive. By calling reap() once before the loop, we catch any process that already exited during the setup window. If the process is still running, the reap is a no-op and SIGCHLD arrives normally via the registered signal handler. Signed-off-by: Ayato Tokubi --- signals.go | 19 +++++++++++++++++++ tests/integration/exec.bats | 15 +++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/signals.go b/signals.go index 936d751f61f..60054b22b02 100644 --- a/signals.go +++ b/signals.go @@ -82,6 +82,25 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach // Perform the initial tty resize. Always ignore errors resizing because // stdout might have disappeared (due to races with when SIGHUP is sent). _ = tty.resize() + // If the process exited before signal.Notify was registered (which + // runs in a goroutine), SIGCHLD may have been silently discarded. + // Do an initial reap to catch this case, otherwise forward() would + // block forever waiting for a signal that will never arrive. + exits, err := h.reap() + if err != nil { + logrus.Error(err) + } + for _, e := range exits { + logrus.WithFields(logrus.Fields{ + "pid": e.pid, + "status": e.status, + }).Debug("process exited") + if e.pid == pid1 { + _, _ = process.Wait() + return e.status, nil + } + } + // Handle and forward signals. for s := range h.signals { switch s { diff --git a/tests/integration/exec.bats b/tests/integration/exec.bats index 11e4bb473a8..ca1391cbd61 100644 --- a/tests/integration/exec.bats +++ b/tests/integration/exec.bats @@ -374,6 +374,21 @@ EOF [[ ${lines[0]} = *"exec /run.sh: no such file or directory"* ]] } +# Regression test for a race condition where signal.Notify registration +# could complete after the exec process started. If the process exited +# quickly, SIGCHLD would be missed and runc exec would hang forever. +@test "runc exec [fast-exiting process does not hang]" { + runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox + [ "$status" -eq 0 ] + + for _ in $(seq 20); do + # Inside timeout, `runc` can't be resolved. Using RUNC_CMDLINE instead. + setup_runc_cmdline + timeout --foreground 10 "${RUNC_CMDLINE[@]}" exec test_busybox true + [ "$status" -eq 0 ] + done +} + # https://github.com/opencontainers/runc/issues/4688 @test "runc exec check default home" { # --user can't work in rootless containers that don't have idmap.