From c487840f75db1851cc8fd7822c10f762114dc56a Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 5 Dec 2024 16:15:23 -0800 Subject: [PATCH 1/2] Remove main package dependency on criurpc Commit 7f64fb47 made the main package, and runc/libcontainer's CriuOpts depend on criu/rpc. This is not good; among the other things, it makes it complicated to make c/r optional. Let's switch CriuOpts.ManageCgroupsMode to a string (yes, it's an APIt breaking change) and move the cgroup mode string parsing to libcontainer. While at it, let's better document ManageCgroupsMode. Signed-off-by: Kir Kolyshkin --- checkpoint.go | 17 +---- libcontainer/criu_linux.go | 106 +++++++++++++++++++------------- libcontainer/criu_opts_linux.go | 9 ++- 3 files changed, 69 insertions(+), 63 deletions(-) diff --git a/checkpoint.go b/checkpoint.go index c1bcc703ca9..ffc26a5f313 100644 --- a/checkpoint.go +++ b/checkpoint.go @@ -8,7 +8,6 @@ import ( "path/filepath" "strconv" - criu "github.com/checkpoint-restore/go-criu/v6/rpc" "github.com/moby/sys/userns" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" @@ -132,6 +131,7 @@ func criuOptions(context *cli.Context) (*libcontainer.CriuOpts, error) { StatusFd: context.Int("status-fd"), LsmProfile: context.String("lsm-profile"), LsmMountContext: context.String("lsm-mount-context"), + ManageCgroupsMode: context.String("manage-cgroups-mode"), } // CRIU options below may or may not be set. @@ -152,21 +152,6 @@ func criuOptions(context *cli.Context) (*libcontainer.CriuOpts, error) { } } - switch context.String("manage-cgroups-mode") { - case "": - // do nothing - case "soft": - opts.ManageCgroupsMode = criu.CriuCgMode_SOFT - case "full": - opts.ManageCgroupsMode = criu.CriuCgMode_FULL - case "strict": - opts.ManageCgroupsMode = criu.CriuCgMode_STRICT - case "ignore": - opts.ManageCgroupsMode = criu.CriuCgMode_IGNORE - default: - return nil, errors.New("Invalid manage-cgroups-mode value") - } - // runc doesn't manage network devices and their configuration. nsmask := unix.CLONE_NEWNET diff --git a/libcontainer/criu_linux.go b/libcontainer/criu_linux.go index fed34e79148..65bd08ea1ed 100644 --- a/libcontainer/criu_linux.go +++ b/libcontainer/criu_linux.go @@ -295,6 +295,11 @@ func (c *Container) Checkpoint(criuOpts *CriuOpts) error { return errors.New("invalid directory to save checkpoint") } + cgMode, err := criuCgMode(criuOpts.ManageCgroupsMode) + if err != nil { + return err + } + // Since a container can be C/R'ed multiple times, // the checkpoint directory may already exist. if err := os.Mkdir(criuOpts.ImagesDirectory, 0o700); err != nil && !os.IsExist(err) { @@ -309,22 +314,23 @@ func (c *Container) Checkpoint(criuOpts *CriuOpts) error { defer imageDir.Close() rpcOpts := criurpc.CriuOpts{ - ImagesDirFd: proto.Int32(int32(imageDir.Fd())), - LogLevel: proto.Int32(4), - LogFile: proto.String(logFile), - Root: proto.String(c.config.Rootfs), - ManageCgroups: proto.Bool(true), - NotifyScripts: proto.Bool(true), - Pid: proto.Int32(int32(c.initProcess.pid())), - ShellJob: proto.Bool(criuOpts.ShellJob), - LeaveRunning: proto.Bool(criuOpts.LeaveRunning), - TcpEstablished: proto.Bool(criuOpts.TcpEstablished), - ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), - FileLocks: proto.Bool(criuOpts.FileLocks), - EmptyNs: proto.Uint32(criuOpts.EmptyNs), - OrphanPtsMaster: proto.Bool(true), - AutoDedup: proto.Bool(criuOpts.AutoDedup), - LazyPages: proto.Bool(criuOpts.LazyPages), + ImagesDirFd: proto.Int32(int32(imageDir.Fd())), + LogLevel: proto.Int32(4), + LogFile: proto.String(logFile), + Root: proto.String(c.config.Rootfs), + ManageCgroups: proto.Bool(true), // Obsoleted by ManageCgroupsMode. + ManageCgroupsMode: &cgMode, + NotifyScripts: proto.Bool(true), + Pid: proto.Int32(int32(c.initProcess.pid())), + ShellJob: proto.Bool(criuOpts.ShellJob), + LeaveRunning: proto.Bool(criuOpts.LeaveRunning), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + FileLocks: proto.Bool(criuOpts.FileLocks), + EmptyNs: proto.Uint32(criuOpts.EmptyNs), + OrphanPtsMaster: proto.Bool(true), + AutoDedup: proto.Bool(criuOpts.AutoDedup), + LazyPages: proto.Bool(criuOpts.LazyPages), } // if criuOpts.WorkDirectory is not set, criu default is used. @@ -381,12 +387,6 @@ func (c *Container) Checkpoint(criuOpts *CriuOpts) error { rpcOpts.TrackMem = proto.Bool(true) } - // append optional manage cgroups mode - if criuOpts.ManageCgroupsMode != 0 { - mode := criuOpts.ManageCgroupsMode - rpcOpts.ManageCgroupsMode = &mode - } - var t criurpc.CriuReqType if criuOpts.PreDump { feat := criurpc.CriuFeatures{ @@ -634,6 +634,12 @@ func (c *Container) Restore(process *Process, criuOpts *CriuOpts) error { if criuOpts.ImagesDirectory == "" { return errors.New("invalid directory to restore checkpoint") } + + cgMode, err := criuCgMode(criuOpts.ManageCgroupsMode) + if err != nil { + return err + } + logDir := criuOpts.ImagesDirectory imageDir, err := os.Open(criuOpts.ImagesDirectory) if err != nil { @@ -663,22 +669,23 @@ func (c *Container) Restore(process *Process, criuOpts *CriuOpts) error { req := &criurpc.CriuReq{ Type: &t, Opts: &criurpc.CriuOpts{ - ImagesDirFd: proto.Int32(int32(imageDir.Fd())), - EvasiveDevices: proto.Bool(true), - LogLevel: proto.Int32(4), - LogFile: proto.String(logFile), - RstSibling: proto.Bool(true), - Root: proto.String(root), - ManageCgroups: proto.Bool(true), - NotifyScripts: proto.Bool(true), - ShellJob: proto.Bool(criuOpts.ShellJob), - ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), - TcpEstablished: proto.Bool(criuOpts.TcpEstablished), - FileLocks: proto.Bool(criuOpts.FileLocks), - EmptyNs: proto.Uint32(criuOpts.EmptyNs), - OrphanPtsMaster: proto.Bool(true), - AutoDedup: proto.Bool(criuOpts.AutoDedup), - LazyPages: proto.Bool(criuOpts.LazyPages), + ImagesDirFd: proto.Int32(int32(imageDir.Fd())), + EvasiveDevices: proto.Bool(true), + LogLevel: proto.Int32(4), + LogFile: proto.String(logFile), + RstSibling: proto.Bool(true), + Root: proto.String(root), + ManageCgroups: proto.Bool(true), // Obsoleted by ManageCgroupsMode. + ManageCgroupsMode: &cgMode, + NotifyScripts: proto.Bool(true), + ShellJob: proto.Bool(criuOpts.ShellJob), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), + FileLocks: proto.Bool(criuOpts.FileLocks), + EmptyNs: proto.Uint32(criuOpts.EmptyNs), + OrphanPtsMaster: proto.Bool(true), + AutoDedup: proto.Bool(criuOpts.AutoDedup), + LazyPages: proto.Bool(criuOpts.LazyPages), }, } @@ -757,12 +764,6 @@ func (c *Container) Restore(process *Process, criuOpts *CriuOpts) error { c.restoreNetwork(req, criuOpts) } - // append optional manage cgroups mode - if criuOpts.ManageCgroupsMode != 0 { - mode := criuOpts.ManageCgroupsMode - req.Opts.ManageCgroupsMode = &mode - } - var ( fds []string fdJSON []byte @@ -1184,3 +1185,20 @@ func (c *Container) criuNotifications(resp *criurpc.CriuResp, process *Process, } return nil } + +func criuCgMode(mode string) (criurpc.CriuCgMode, error) { + switch mode { + case "": + return criurpc.CriuCgMode_DEFAULT, nil + case "soft": + return criurpc.CriuCgMode_SOFT, nil + case "full": + return criurpc.CriuCgMode_FULL, nil + case "strict": + return criurpc.CriuCgMode_STRICT, nil + case "ignore": + return criurpc.CriuCgMode_IGNORE, nil + default: + return 0, errors.New("invalid manage-cgroups-mode value") + } +} diff --git a/libcontainer/criu_opts_linux.go b/libcontainer/criu_opts_linux.go index 6b0cfb82b12..f26df7d8da7 100644 --- a/libcontainer/criu_opts_linux.go +++ b/libcontainer/criu_opts_linux.go @@ -1,7 +1,5 @@ package libcontainer -import criu "github.com/checkpoint-restore/go-criu/v6/rpc" - type CriuPageServerInfo struct { Address string // IP address of CRIU page server Port int32 // port number of CRIU page server @@ -24,11 +22,16 @@ type CriuOpts struct { PreDump bool // call criu predump to perform iterative checkpoint PageServer CriuPageServerInfo // allow to dump to criu page server VethPairs []VethPairName // pass the veth to criu when restore - ManageCgroupsMode criu.CriuCgMode // dump or restore cgroup mode EmptyNs uint32 // don't c/r properties for namespace from this mask AutoDedup bool // auto deduplication for incremental dumps LazyPages bool // restore memory pages lazily using userfaultfd StatusFd int // fd for feedback when lazy server is ready LsmProfile string // LSM profile used to restore the container LsmMountContext string // LSM mount context value to use during restore + + // ManageCgroupsMode tells how criu should manage cgroups during + // checkpoint or restore. Possible values are: "soft", "full", + // "strict", "ignore", or "" (empty string) for criu default. + // See https://criu.org/CGroups for more details. + ManageCgroupsMode string } From 47dc185880dcae67daa0c8900682f8eed0200ccd Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 5 Dec 2024 16:32:18 -0800 Subject: [PATCH 2/2] Add runc_nocriu build tag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows to make a 17% smaller runc binary by not compiling in checkpoint/restore support. It turns out that google.golang.org/protobuf package, used by go-criu, is quite big, and go linker can't drop unused stuff if reflection is used anywhere in the code. Currently there's no alternative to using protobuf in go-criu, and since not all users use c/r, let's provide them an option for a smaller binary. For the reference, here's top10 biggest vendored packages, as reported by gsa[1]: $ gsa runc | grep vendor | head │ 8.59% │ google.golang.org/protobuf │ 1.3 MB │ vendor │ │ 5.76% │ github.com/opencontainers/runc │ 865 kB │ vendor │ │ 4.05% │ github.com/cilium/ebpf │ 608 kB │ vendor │ │ 2.86% │ github.com/godbus/dbus/v5 │ 429 kB │ vendor │ │ 1.25% │ github.com/urfave/cli │ 188 kB │ vendor │ │ 0.90% │ github.com/vishvananda/netlink │ 135 kB │ vendor │ │ 0.59% │ github.com/sirupsen/logrus │ 89 kB │ vendor │ │ 0.56% │ github.com/checkpoint-restore/go-criu/v6 │ 84 kB │ vendor │ │ 0.51% │ golang.org/x/sys │ 76 kB │ vendor │ │ 0.47% │ github.com/seccomp/libseccomp-golang │ 71 kB │ vendor │ And here is a total binary size saving when `runc_nocriu` is used. For non-stripped binaries: $ gsa runc-cr runc-nocr | tail -3 │ -17.04% │ runc-cr │ 15 MB │ 12 MB │ -2.6 MB │ │ │ runc-nocr │ │ │ │ └─────────┴──────────────────────────────────────────┴──────────┴──────────┴─────────┘ And for stripped binaries: │ -17.01% │ runc-cr-stripped │ 11 MB │ 8.8 MB │ -1.8 MB │ │ │ runc-nocr-stripped │ │ │ │ └─────────┴──────────────────────────────────────────┴──────────┴──────────┴─────────┘ [1]: https://github.com/Zxilly/go-size-analyzer Signed-off-by: Kir Kolyshkin --- .github/workflows/validate.yml | 6 ++++++ README.md | 8 ++++++++ libcontainer/criu_disabled_linux.go | 15 +++++++++++++++ libcontainer/criu_linux.go | 2 ++ 4 files changed, 31 insertions(+) create mode 100644 libcontainer/criu_disabled_linux.go diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index cfdf4fb8725..3b565f05215 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -76,8 +76,14 @@ jobs: uses: actions/setup-go@v5 with: go-version: "${{ env.GO_VERSION }}" + - name: install deps + run: | + sudo apt update + sudo apt -y install libseccomp-dev - name: compile with no build tags run: make BUILDTAGS="" + - name: compile with runc_nocriu build tag + run: make EXTRA_BUILDTAGS="runc_nocriu" codespell: runs-on: ubuntu-24.04 diff --git a/README.md b/README.md index 50fcd4e9222..5b1ac15d55e 100644 --- a/README.md +++ b/README.md @@ -103,9 +103,17 @@ e.g. to disable seccomp: make BUILDTAGS="" ``` +To add some more build tags to the default set, use the `EXTRA_BUILDTAGS` +make variable, e.g. to disable checkpoint/restore: + +```bash +make EXTRA_BUILDTAGS="runc_nocriu" +``` + | Build Tag | Feature | Enabled by Default | Dependencies | |---------------|---------------------------------------|--------------------|---------------------| | `seccomp` | Syscall filtering using `libseccomp`. | yes | `libseccomp` | +| `runc_nocriu` | **Disables** runc checkpoint/restore. | no | `criu` | The following build tags were used earlier, but are now obsoleted: - **runc_nodmz** (since runc v1.2.1 runc dmz binary is dropped) diff --git a/libcontainer/criu_disabled_linux.go b/libcontainer/criu_disabled_linux.go new file mode 100644 index 00000000000..28c4ad1664d --- /dev/null +++ b/libcontainer/criu_disabled_linux.go @@ -0,0 +1,15 @@ +//go:build runc_nocriu + +package libcontainer + +import "errors" + +var ErrNoCR = errors.New("this runc binary has not been compiled with checkpoint/restore support enabled (runc_nocriu)") + +func (c *Container) Restore(process *Process, criuOpts *CriuOpts) error { + return ErrNoCR +} + +func (c *Container) Checkpoint(criuOpts *CriuOpts) error { + return ErrNoCR +} diff --git a/libcontainer/criu_linux.go b/libcontainer/criu_linux.go index 65bd08ea1ed..a7651958a88 100644 --- a/libcontainer/criu_linux.go +++ b/libcontainer/criu_linux.go @@ -1,3 +1,5 @@ +//go:build !runc_nocriu + package libcontainer import (