diff --git a/signals.go b/signals.go index 936d751f61f..60054b22b02 100644 --- a/signals.go +++ b/signals.go @@ -82,6 +82,25 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach // Perform the initial tty resize. Always ignore errors resizing because // stdout might have disappeared (due to races with when SIGHUP is sent). _ = tty.resize() + // If the process exited before signal.Notify was registered (which + // runs in a goroutine), SIGCHLD may have been silently discarded. + // Do an initial reap to catch this case, otherwise forward() would + // block forever waiting for a signal that will never arrive. + exits, err := h.reap() + if err != nil { + logrus.Error(err) + } + for _, e := range exits { + logrus.WithFields(logrus.Fields{ + "pid": e.pid, + "status": e.status, + }).Debug("process exited") + if e.pid == pid1 { + _, _ = process.Wait() + return e.status, nil + } + } + // Handle and forward signals. for s := range h.signals { switch s { diff --git a/tests/integration/exec.bats b/tests/integration/exec.bats index 11e4bb473a8..ca1391cbd61 100644 --- a/tests/integration/exec.bats +++ b/tests/integration/exec.bats @@ -374,6 +374,21 @@ EOF [[ ${lines[0]} = *"exec /run.sh: no such file or directory"* ]] } +# Regression test for a race condition where signal.Notify registration +# could complete after the exec process started. If the process exited +# quickly, SIGCHLD would be missed and runc exec would hang forever. +@test "runc exec [fast-exiting process does not hang]" { + runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox + [ "$status" -eq 0 ] + + for _ in $(seq 20); do + # Inside timeout, `runc` can't be resolved. Using RUNC_CMDLINE instead. + setup_runc_cmdline + timeout --foreground 10 "${RUNC_CMDLINE[@]}" exec test_busybox true + [ "$status" -eq 0 ] + done +} + # https://github.com/opencontainers/runc/issues/4688 @test "runc exec check default home" { # --user can't work in rootless containers that don't have idmap.