Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add runc_nocriu build tag to opt out of c/r #4546

Merged
merged 2 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,14 @@ jobs:
uses: actions/setup-go@v5
with:
go-version: "${{ env.GO_VERSION }}"
- name: install deps
run: |
sudo apt update
sudo apt -y install libseccomp-dev
- name: compile with no build tags
run: make BUILDTAGS=""
- name: compile with runc_nocriu build tag
run: make EXTRA_BUILDTAGS="runc_nocriu"

codespell:
runs-on: ubuntu-24.04
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,17 @@ e.g. to disable seccomp:
make BUILDTAGS=""
```

To add some more build tags to the default set, use the `EXTRA_BUILDTAGS`
make variable, e.g. to disable checkpoint/restore:

```bash
make EXTRA_BUILDTAGS="runc_nocriu"
```

| Build Tag | Feature | Enabled by Default | Dependencies |
|---------------|---------------------------------------|--------------------|---------------------|
| `seccomp` | Syscall filtering using `libseccomp`. | yes | `libseccomp` |
| `runc_nocriu` | **Disables** runc checkpoint/restore. | no | `criu` |

The following build tags were used earlier, but are now obsoleted:
- **runc_nodmz** (since runc v1.2.1 runc dmz binary is dropped)
Expand Down
17 changes: 1 addition & 16 deletions checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"path/filepath"
"strconv"

criu "github.com/checkpoint-restore/go-criu/v6/rpc"
"github.com/moby/sys/userns"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -132,6 +131,7 @@ func criuOptions(context *cli.Context) (*libcontainer.CriuOpts, error) {
StatusFd: context.Int("status-fd"),
LsmProfile: context.String("lsm-profile"),
LsmMountContext: context.String("lsm-mount-context"),
ManageCgroupsMode: context.String("manage-cgroups-mode"),
}

// CRIU options below may or may not be set.
Expand All @@ -152,21 +152,6 @@ func criuOptions(context *cli.Context) (*libcontainer.CriuOpts, error) {
}
}

switch context.String("manage-cgroups-mode") {
case "":
// do nothing
case "soft":
opts.ManageCgroupsMode = criu.CriuCgMode_SOFT
case "full":
opts.ManageCgroupsMode = criu.CriuCgMode_FULL
case "strict":
opts.ManageCgroupsMode = criu.CriuCgMode_STRICT
case "ignore":
opts.ManageCgroupsMode = criu.CriuCgMode_IGNORE
default:
return nil, errors.New("Invalid manage-cgroups-mode value")
}

// runc doesn't manage network devices and their configuration.
nsmask := unix.CLONE_NEWNET

Expand Down
15 changes: 15 additions & 0 deletions libcontainer/criu_disabled_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
//go:build runc_nocriu

package libcontainer

import "errors"

var ErrNoCR = errors.New("this runc binary has not been compiled with checkpoint/restore support enabled (runc_nocriu)")

func (c *Container) Restore(process *Process, criuOpts *CriuOpts) error {
return ErrNoCR
}

func (c *Container) Checkpoint(criuOpts *CriuOpts) error {
return ErrNoCR
}
108 changes: 64 additions & 44 deletions libcontainer/criu_linux.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//go:build !runc_nocriu

package libcontainer

import (
Expand Down Expand Up @@ -295,6 +297,11 @@ func (c *Container) Checkpoint(criuOpts *CriuOpts) error {
return errors.New("invalid directory to save checkpoint")
}

cgMode, err := criuCgMode(criuOpts.ManageCgroupsMode)
if err != nil {
return err
}

// Since a container can be C/R'ed multiple times,
// the checkpoint directory may already exist.
if err := os.Mkdir(criuOpts.ImagesDirectory, 0o700); err != nil && !os.IsExist(err) {
Expand All @@ -309,22 +316,23 @@ func (c *Container) Checkpoint(criuOpts *CriuOpts) error {
defer imageDir.Close()

rpcOpts := criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
LogLevel: proto.Int32(4),
LogFile: proto.String(logFile),
Root: proto.String(c.config.Rootfs),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
Pid: proto.Int32(int32(c.initProcess.pid())),
ShellJob: proto.Bool(criuOpts.ShellJob),
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
FileLocks: proto.Bool(criuOpts.FileLocks),
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
OrphanPtsMaster: proto.Bool(true),
AutoDedup: proto.Bool(criuOpts.AutoDedup),
LazyPages: proto.Bool(criuOpts.LazyPages),
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
LogLevel: proto.Int32(4),
LogFile: proto.String(logFile),
Root: proto.String(c.config.Rootfs),
ManageCgroups: proto.Bool(true), // Obsoleted by ManageCgroupsMode.
ManageCgroupsMode: &cgMode,
NotifyScripts: proto.Bool(true),
Pid: proto.Int32(int32(c.initProcess.pid())),
ShellJob: proto.Bool(criuOpts.ShellJob),
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
FileLocks: proto.Bool(criuOpts.FileLocks),
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
OrphanPtsMaster: proto.Bool(true),
AutoDedup: proto.Bool(criuOpts.AutoDedup),
LazyPages: proto.Bool(criuOpts.LazyPages),
}

// if criuOpts.WorkDirectory is not set, criu default is used.
Expand Down Expand Up @@ -381,12 +389,6 @@ func (c *Container) Checkpoint(criuOpts *CriuOpts) error {
rpcOpts.TrackMem = proto.Bool(true)
}

// append optional manage cgroups mode
if criuOpts.ManageCgroupsMode != 0 {
mode := criuOpts.ManageCgroupsMode
rpcOpts.ManageCgroupsMode = &mode
}

var t criurpc.CriuReqType
if criuOpts.PreDump {
feat := criurpc.CriuFeatures{
Expand Down Expand Up @@ -634,6 +636,12 @@ func (c *Container) Restore(process *Process, criuOpts *CriuOpts) error {
if criuOpts.ImagesDirectory == "" {
return errors.New("invalid directory to restore checkpoint")
}

cgMode, err := criuCgMode(criuOpts.ManageCgroupsMode)
if err != nil {
return err
}

logDir := criuOpts.ImagesDirectory
imageDir, err := os.Open(criuOpts.ImagesDirectory)
if err != nil {
Expand Down Expand Up @@ -663,22 +671,23 @@ func (c *Container) Restore(process *Process, criuOpts *CriuOpts) error {
req := &criurpc.CriuReq{
Type: &t,
Opts: &criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
EvasiveDevices: proto.Bool(true),
LogLevel: proto.Int32(4),
LogFile: proto.String(logFile),
RstSibling: proto.Bool(true),
Root: proto.String(root),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
ShellJob: proto.Bool(criuOpts.ShellJob),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
FileLocks: proto.Bool(criuOpts.FileLocks),
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
OrphanPtsMaster: proto.Bool(true),
AutoDedup: proto.Bool(criuOpts.AutoDedup),
LazyPages: proto.Bool(criuOpts.LazyPages),
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
EvasiveDevices: proto.Bool(true),
LogLevel: proto.Int32(4),
LogFile: proto.String(logFile),
RstSibling: proto.Bool(true),
Root: proto.String(root),
ManageCgroups: proto.Bool(true), // Obsoleted by ManageCgroupsMode.
ManageCgroupsMode: &cgMode,
NotifyScripts: proto.Bool(true),
ShellJob: proto.Bool(criuOpts.ShellJob),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
FileLocks: proto.Bool(criuOpts.FileLocks),
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
OrphanPtsMaster: proto.Bool(true),
AutoDedup: proto.Bool(criuOpts.AutoDedup),
LazyPages: proto.Bool(criuOpts.LazyPages),
},
}

Expand Down Expand Up @@ -757,12 +766,6 @@ func (c *Container) Restore(process *Process, criuOpts *CriuOpts) error {
c.restoreNetwork(req, criuOpts)
}

// append optional manage cgroups mode
if criuOpts.ManageCgroupsMode != 0 {
mode := criuOpts.ManageCgroupsMode
req.Opts.ManageCgroupsMode = &mode
}

var (
fds []string
fdJSON []byte
Expand Down Expand Up @@ -1184,3 +1187,20 @@ func (c *Container) criuNotifications(resp *criurpc.CriuResp, process *Process,
}
return nil
}

func criuCgMode(mode string) (criurpc.CriuCgMode, error) {
switch mode {
case "":
return criurpc.CriuCgMode_DEFAULT, nil
case "soft":
return criurpc.CriuCgMode_SOFT, nil
case "full":
return criurpc.CriuCgMode_FULL, nil
case "strict":
return criurpc.CriuCgMode_STRICT, nil
case "ignore":
return criurpc.CriuCgMode_IGNORE, nil
default:
return 0, errors.New("invalid manage-cgroups-mode value")
}
}
9 changes: 6 additions & 3 deletions libcontainer/criu_opts_linux.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package libcontainer
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to also move the CriuPageServerInfo stuff from checkpoint.go so we can mark this file as //go:build !runc_nocr as well? I guess it doesn't matter for the size of the final binary, but it is a little odd to keep this even with runc_nocr.

Actually, would it be possible to move criuOptions to a !runc_nocr file, or would that be too difficult?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did that initially, but when I realized that runc checkpoint and runc restore commands should be kept intact, so when a user runs those, they see a useful error like "c/r not compiled in" rather than No help topic for 'checkpoint' (which is urfave/cli way to say no such command).

Same for all the checkpoint/restore options: I kept those so that a valid command like runc checkpoint --leave-running XXX results in a useful "c/r not compiled in" error rather than "flag provided but not defined: -leave-running`.

In order to keep both commands and their options, we have to keep the data structures, too. We're lucky there aren't that many.

Sure, we can duplicate the whole thing with commands and options but without data structures, but that would be a mere duplication.

So, for the sake of both simplicity and user experience I keep all the CLI parsing (and the associated data structures) in place. Hope it makes sense.


import criu "github.com/checkpoint-restore/go-criu/v6/rpc"

type CriuPageServerInfo struct {
Address string // IP address of CRIU page server
Port int32 // port number of CRIU page server
Expand All @@ -24,11 +22,16 @@ type CriuOpts struct {
PreDump bool // call criu predump to perform iterative checkpoint
PageServer CriuPageServerInfo // allow to dump to criu page server
VethPairs []VethPairName // pass the veth to criu when restore
ManageCgroupsMode criu.CriuCgMode // dump or restore cgroup mode
EmptyNs uint32 // don't c/r properties for namespace from this mask
AutoDedup bool // auto deduplication for incremental dumps
LazyPages bool // restore memory pages lazily using userfaultfd
StatusFd int // fd for feedback when lazy server is ready
LsmProfile string // LSM profile used to restore the container
LsmMountContext string // LSM mount context value to use during restore

// ManageCgroupsMode tells how criu should manage cgroups during
// checkpoint or restore. Possible values are: "soft", "full",
// "strict", "ignore", or "" (empty string) for criu default.
// See https://criu.org/CGroups for more details.
ManageCgroupsMode string
}
Loading