Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try to use pidfd and epoll to wait init process exit #4517

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 21 additions & 25 deletions delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,12 @@ import (
"fmt"
"os"
"path/filepath"
"time"

"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/urfave/cli"

"golang.org/x/sys/unix"
)

func killContainer(container *libcontainer.Container) error {
_ = container.Signal(unix.SIGKILL)
for i := 0; i < 100; i++ {
time.Sleep(100 * time.Millisecond)
if err := container.Signal(unix.Signal(0)); err != nil {
return container.Destroy()
}
}
return errors.New("container init still running")
}

var deleteCommand = cli.Command{
Name: "delete",
Usage: "delete any resources held by the container often used with detached container",
Expand Down Expand Up @@ -65,25 +52,34 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
}
return err
}
// When --force is given, we kill all container processes and
// then destroy the container. This is done even for a stopped
// container, because (in case it does not have its own PID
// namespace) there may be some leftover processes in the
// container's cgroup.
if force {
return killContainer(container)
}
s, err := container.Status()
if err != nil {
return err
}
switch s {
case libcontainer.Stopped:
return container.Destroy()
// For a stopped container, because (in case it does not have
// its own PID namespace) there may be some leftover processes
// in the container's cgroup.
if !container.Config().Namespaces.IsPrivate(configs.NEWPID) {
if err := container.Kill(); err != nil {
return err
}
}
case libcontainer.Created:
return killContainer(container)
if err := container.Kill(); err != nil {
return err
}
default:
return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s)
if !force {
return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s)
}
// When --force is given, we kill all container processes and
// then destroy the container.
if err := container.Kill(); err != nil {
return err
}
}
return container.Destroy()
},
}
83 changes: 83 additions & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,89 @@ func (c *Container) signal(s os.Signal) error {
return nil
}

func (c *Container) killViaPidfd() error {
pidfd, err := unix.PidfdOpen(c.initProcess.pid(), 0)
if err != nil {
return err
}
defer unix.Close(pidfd)

epollfd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
if err != nil {
return err
}
defer unix.Close(epollfd)

event := unix.EpollEvent{
Events: unix.EPOLLIN,
Fd: int32(pidfd),
}
if err := unix.EpollCtl(epollfd, unix.EPOLL_CTL_ADD, pidfd, &event); err != nil {
return err
}

// We don't need unix.PidfdSendSignal because go runtime will use it if possible.
_ = c.Signal(unix.SIGKILL)

events := make([]unix.EpollEvent, 1)
for {
// Set the timeout to 10s, the same as in kill below.
n, err := unix.EpollWait(epollfd, events, 10000)
if err != nil {
if err == unix.EINTR {
continue
}
return err
}

if n == 0 {
return errors.New("container init still running")
}

if n > 0 {
event := events[0]
if event.Fd == int32(pidfd) {
return nil
}
}
}
}

func (c *Container) kill() error {
_ = c.Signal(unix.SIGKILL)

// For containers running in a low load machine, we only need to wait about 1ms.
time.Sleep(time.Millisecond)
if err := c.Signal(unix.Signal(0)); err != nil {
return nil
}

// For some containers in a heavy load machine, we need to wait more time.
logrus.Debugln("We need more time to wait the init process exit.")
for i := 0; i < 100; i++ {
time.Sleep(100 * time.Millisecond)
if err := c.Signal(unix.Signal(0)); err != nil {
return nil
}
}
return errors.New("container init still running")
}

// Kill kills the container and waits for the init process to exit.
func (c *Container) Kill() error {
// When a container doesn't have a private pidns, we have to kill all processes
// in the cgroup, it's more simpler to use `cgroup.kill` or `unix.Kill`.
if c.config.Namespaces.IsPrivate(configs.NEWPID) {
kolyshkin marked this conversation as resolved.
Show resolved Hide resolved
err := c.killViaPidfd()
if err == nil {
return nil
}

logrus.Debugf("pidfd & epoll failed, falling back to unix.Signal: %v", err)
}
return c.kill()
}

func (c *Container) createExecFifo() (retErr error) {
rootuid, err := c.Config().HostRootUID()
if err != nil {
Expand Down
12 changes: 2 additions & 10 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,7 @@ func (p *setnsProcess) startTime() (uint64, error) {
}

func (p *setnsProcess) signal(sig os.Signal) error {
s, ok := sig.(unix.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
return unix.Kill(p.pid(), s)
return p.cmd.Process.Signal(sig)
}

func (p *setnsProcess) start() (retErr error) {
Expand Down Expand Up @@ -838,11 +834,7 @@ func (p *initProcess) createNetworkInterfaces() error {
}

func (p *initProcess) signal(sig os.Signal) error {
s, ok := sig.(unix.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
return unix.Kill(p.pid(), s)
return p.cmd.Process.Signal(sig)
}

func (p *initProcess) setExternalDescriptors(newFds []string) {
Expand Down
Loading