Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"io"
"net"
"os"
"os/signal"
"path/filepath"
"runtime"
"runtime/debug"
Expand Down Expand Up @@ -41,6 +42,36 @@ type pid struct {
PidFirstChild int `json:"stage1_pid"`
}

func setupPreExecSignalExit() (chan bool, func()) {
if unix.Getpid() != 1 {
return nil, nil
}

// The Go runtime assumes the kernel will finish terminating _SigKill
// signals by calling dieFromSignal:
// https://github.com/golang/go/blob/c60392da8b6f18b2aa92db5d22c4963ec25ae0ad/src/runtime/signal_unix.go#L993
// But Linux suppresses the default action for PID 1. While this helper is
// still the container's PID 1, translate the affected signals to 128+signo
// instead of leaking Go's fallback exit status 2.
setupDone := make(chan bool, 1)
signals := make(chan os.Signal, 1)
signal.Notify(signals, unix.SIGTERM, unix.SIGINT, unix.SIGHUP)
go func() {
setupDone <- true
sig, ok := <-signals
if !ok {
return
}
if s, ok := sig.(unix.Signal); ok {
os.Exit(128 + int(s))
}
}()
return setupDone, func() {
signal.Stop(signals)
close(signals)
}
}

// network is an internal struct used to setup container networks.
type network struct {
configs.Network
Expand Down Expand Up @@ -103,6 +134,28 @@ type initConfig struct {

// Init is part of "runc init" implementation.
func Init() {
// Translate termination signals to conventional shell-style exit codes
// while PID 1 is still the Go-based runc init helper.
// NOTE: This setup must occur BEFORE `syncParentHooks`. Once the parent receives the
// sync notification, it may immediately enforce a strict cgroup PID limit (e.g., <5).
// If the signal handler isn't ready by then, any subsequent OS thread creation
// (even by the Go runtime) could exceed the limit and cause the kernel to kill PID 1.
preExecSignalSetupDone, preExecSignalCloser := setupPreExecSignalExit()
if preExecSignalCloser != nil {
// This defer is technically unnecessary today, since any error leads to os.Exit(255)
// and the process terminates immediately. However, we retain it as a safeguard:
// if the error handling is later refactored to return instead of calling os.Exit,
// resource cleanup won’t be accidentally omitted.
defer preExecSignalCloser()
}

// Wait for the signal handler goroutine to finish initialization. Without this,
// there's a race: `syncParentHooks` could notify the parent before the handler is
// ready, triggering PID enforcement while the runtime is still setting up threads.
if preExecSignalSetupDone != nil {
<-preExecSignalSetupDone
}

runtime.GOMAXPROCS(1)
runtime.LockOSThread()

Expand Down
167 changes: 167 additions & 0 deletions libcontainer/integration/preexec_signal_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package integration

import (
"errors"
"os"
"path/filepath"
"syscall"
"testing"
"time"

"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"

"golang.org/x/sys/unix"
)

// Test to simulate problem we saw in https://github.com/kubernetes/kubernetes/issues/135713
func TestPreExecSignalMapping(t *testing.T) {
if testing.Short() {
return
}

testCases := []struct {
name string
sig unix.Signal
}{
{name: "SIGTERM", sig: unix.SIGTERM},
{name: "SIGINT", sig: unix.SIGINT},
{name: "SIGHUP", sig: unix.SIGHUP},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
container, process, hookRelease := newPreExecSignalProcess(t)
defer destroyContainer(container)

ok(t, container.Signal(tc.sig))
if got, want := waitForExitCodeWhileHookBlocked(t, process, hookRelease), 128+int(tc.sig); got != want {
t.Fatalf("expected %s to exit %d, got %d", tc.name, want, got)
}
})
}
}

func newPreExecSignalProcess(t testing.TB) (*libcontainer.Container, *libcontainer.Process, string) {
t.Helper()

config := newTemplateConfig(t, nil)
hookReady := filepath.Join(config.Rootfs, "startContainer-hook-ready")
hookRelease := filepath.Join(config.Rootfs, "startContainer-hook-release")
ok(t, unix.Mkfifo(hookRelease, 0o600))
config.Hooks = configs.Hooks{
configs.StartContainer: configs.HookList{
configs.NewCommandHook(&configs.Command{
Path: "/bin/sh",
Args: []string{"/bin/sh", "-c", "touch /startContainer-hook-ready && cat /startContainer-hook-release >/dev/null"},
}),
},
}

container, err := newContainer(t, config)
ok(t, err)

process := &libcontainer.Process{
Cwd: "/",
Args: []string{"false"},
Env: standardEnvironment,
Init: true,
}

ok(t, container.Run(process))
waitForFile(t, hookReady)
return container, process, hookRelease
}

func waitForFile(t testing.TB, path string) {
t.Helper()

deadline := time.Now().Add(5 * time.Second)
for time.Now().Before(deadline) {
if _, err := os.Stat(path); err == nil {
return
} else if !errors.Is(err, os.ErrNotExist) {
t.Fatalf("stat %s: %v", path, err)
}
time.Sleep(10 * time.Millisecond)
}

t.Fatalf("timed out waiting for %s", path)
}

func processExitCode(process *libcontainer.Process) (int, error) {
ps, err := process.Wait()
if err != nil && ps == nil {
return 0, err
}
if ps == nil {
return 0, errors.New("wait returned nil process state")
}

status, ok := ps.Sys().(syscall.WaitStatus)
if !ok {
return 0, errors.New("unexpected wait status type")
}
if !status.Exited() {
return 0, errors.New("process did not exit")
}
return status.ExitStatus(), nil
}

func waitForExitCodeWhileHookBlocked(t testing.TB, process *libcontainer.Process, hookRelease string) int {
t.Helper()

type waitResult struct {
code int
err error
}

results := make(chan waitResult, 1)
go func() {
code, err := processExitCode(process)
results <- waitResult{code: code, err: err}
}()

select {
case result := <-results:
releaseHook(t, hookRelease)
if result.err != nil {
t.Fatalf("wait: %v", result.err)
}
return result.code
case <-time.After(500 * time.Millisecond):
releaseHook(t, hookRelease)
t.Fatal("process did not exit while startContainer hook was still blocking")
return 0
}
Comment on lines +132 to +136
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 500ms timeout here (and the 500ms deadline used inside releaseHook) seems quite tight for an integration test and may cause flakes under CI load. Consider increasing these timeouts (e.g., a few seconds, consistent with other integration tests) while still asserting that the init exits before the hook is released.

Copilot uses AI. Check for mistakes.
}

func releaseHook(t testing.TB, path string) {
t.Helper()

fd, err := unix.Open(path, unix.O_WRONLY|unix.O_NONBLOCK|unix.O_CLOEXEC, 0)
if errors.Is(err, unix.ENXIO) || errors.Is(err, os.ErrNotExist) {
return
}
ok(t, err)
Comment on lines +142 to +146
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

releaseHook treats ENXIO from opening the FIFO as a no-op return. ENXIO can also occur if the hook hasn't opened the FIFO for reading yet (a race even after hookReady is created), which can leave the StartContainer hook blocked and make this test flaky/hang. Consider retrying on ENXIO until a short deadline (or change the hook command so the FIFO is opened before writing the ready file).

Suggested change
fd, err := unix.Open(path, unix.O_WRONLY|unix.O_NONBLOCK|unix.O_CLOEXEC, 0)
if errors.Is(err, unix.ENXIO) || errors.Is(err, os.ErrNotExist) {
return
}
ok(t, err)
openDeadline := time.Now().Add(500 * time.Millisecond)
var (
fd int
err error
)
for {
fd, err = unix.Open(path, unix.O_WRONLY|unix.O_NONBLOCK|unix.O_CLOEXEC, 0)
if errors.Is(err, os.ErrNotExist) {
// FIFO does not exist (any more) — nothing to release.
return
}
if errors.Is(err, unix.ENXIO) {
// Reader (hook) has not opened the FIFO yet; retry for a short time.
if time.Now().After(openDeadline) {
t.Fatalf("timed out waiting to open %s for writing: %v", path, err)
}
time.Sleep(10 * time.Millisecond)
continue
}
ok(t, err)
break
}

Copilot uses AI. Check for mistakes.
f := os.NewFile(uintptr(fd), path)
if _, err := f.Write([]byte("ok\n")); err != nil {
_ = f.Close()
t.Fatalf("write %s: %v", path, err)
}
ok(t, f.Close())

deadline := time.Now().Add(500 * time.Millisecond)
for time.Now().Before(deadline) {
fd, err := unix.Open(path, unix.O_WRONLY|unix.O_NONBLOCK|unix.O_CLOEXEC, 0)
if errors.Is(err, unix.ENXIO) || errors.Is(err, os.ErrNotExist) {
return
}
if err == nil {
_ = unix.Close(fd)
}
time.Sleep(10 * time.Millisecond)
}

t.Fatalf("timed out waiting for startContainer hook to exit")
}
Loading