From 353e91770b4b0c0e2e2b3df4a74174a739a48fa9 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 3 Apr 2020 19:19:39 -0700 Subject: [PATCH 01/13] cgroups/fs2: do not use securejoin In this very case, the code is writing to cgroup2 filesystem, and the file name is well known and can't possibly be a symlink. So, using securejoin is redundant. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/fs2/fs2.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go index 4e0cb6fa680..8697e3c7203 100644 --- a/libcontainer/cgroups/fs2/fs2.go +++ b/libcontainer/cgroups/fs2/fs2.go @@ -8,7 +8,6 @@ import ( "path/filepath" "strings" - securejoin "github.com/cyphar/filepath-securejoin" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/pkg/errors" @@ -50,10 +49,7 @@ func detectControllers(dirPath string) (map[string]struct{}, error) { if err := os.MkdirAll(dirPath, 0755); err != nil { return nil, err } - controllersPath, err := securejoin.SecureJoin(dirPath, "cgroup.controllers") - if err != nil { - return nil, err - } + controllersPath := filepath.Join(dirPath, "cgroup.controllers") controllersData, err := ioutil.ReadFile(controllersPath) if err != nil { return nil, err From 480bca91be6361f89d22a2005438a5c14c01bd06 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sat, 4 Apr 2020 11:04:29 -0700 Subject: [PATCH 02/13] cgroups/fs2: move type decl to beginning It was weird having it somewhere in the middle. No code change, just moving it around. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/fs2/fs2.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go index 8697e3c7203..4ab291b1889 100644 --- a/libcontainer/cgroups/fs2/fs2.go +++ b/libcontainer/cgroups/fs2/fs2.go @@ -13,6 +13,16 @@ import ( "github.com/pkg/errors" ) +type manager struct { + config *configs.Cgroup + // dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" + dirPath string + // controllers is content of "cgroup.controllers" file. + // excludes pseudo-controllers ("devices" and "freezer"). + controllers map[string]struct{} + rootless bool +} + // NewManager creates a manager for cgroup v2 unified hierarchy. // dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope". // If dirPath is empty, it is automatically set using config. @@ -62,16 +72,6 @@ func detectControllers(dirPath string) (map[string]struct{}, error) { return controllers, nil } -type manager struct { - config *configs.Cgroup - // dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" - dirPath string - // controllers is content of "cgroup.controllers" file. - // excludes pseudo-controllers ("devices" and "freezer"). - controllers map[string]struct{} - rootless bool -} - func (m *manager) Apply(pid int) error { if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil && !m.rootless { return err From 9c80cd672dd076ab3031da846670917e83886959 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sat, 4 Apr 2020 11:19:05 -0700 Subject: [PATCH 03/13] cgroupv2: rm legacy Paths from systemd driver Having map of per-subsystem paths in systemd unified cgroups driver does not make sense and makes the code less readable. To get rid of it, move the systemd v1-or-v2 init code to libcontainer/factory_linux.go which already has a function to deduce unified path out of paths map. End result is much cleaner code. Besides, we no longer write pid to the same cgroup file 7 times in Apply() like we did before. While at it - add `rootless` flag which is passed on to fs2 manager - merge getv2Path() into GetUnifiedPath(), don't overwrite path if it is set during initialization (on Load). Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/systemd/apply_systemd.go | 20 ----- .../cgroups/systemd/unified_hierarchy.go | 78 +++++++++---------- libcontainer/factory_linux.go | 39 +++++++--- 3 files changed, 65 insertions(+), 72 deletions(-) diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go index 4878448d81f..dca4870c894 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -102,26 +102,6 @@ func getDbusConnection() (*systemdDbus.Conn, error) { return connDbus, connErr } -func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) { - if !IsRunningSystemd() { - return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager") - } - if cgroups.IsCgroup2UnifiedMode() { - return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { - return &UnifiedManager{ - Cgroups: config, - Paths: paths, - } - }, nil - } - return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { - return &LegacyManager{ - Cgroups: config, - Paths: paths, - } - }, nil -} - func (m *LegacyManager) Apply(pid int) error { var ( c = m.Cgroups diff --git a/libcontainer/cgroups/systemd/unified_hierarchy.go b/libcontainer/cgroups/systemd/unified_hierarchy.go index 36ccf2716d6..9e0d6db9c45 100644 --- a/libcontainer/cgroups/systemd/unified_hierarchy.go +++ b/libcontainer/cgroups/systemd/unified_hierarchy.go @@ -16,14 +16,23 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs2" "github.com/opencontainers/runc/libcontainer/configs" - "github.com/pkg/errors" "github.com/sirupsen/logrus" ) type UnifiedManager struct { mu sync.Mutex Cgroups *configs.Cgroup - Paths map[string]string + // path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" + path string + rootless bool +} + +func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) *UnifiedManager { + return &UnifiedManager{ + Cgroups: config, + path: path, + rootless: rootless, + } } func (m *UnifiedManager) Apply(pid int) error { @@ -35,12 +44,7 @@ func (m *UnifiedManager) Apply(pid int) error { ) if c.Paths != nil { - paths := make(map[string]string) - for name, path := range c.Paths { - paths[name] = path - } - m.Paths = paths - return cgroups.EnterPid(m.Paths, pid) + return cgroups.WriteCgroupProc(m.path, pid) } if c.Parent != "" { @@ -144,22 +148,13 @@ func (m *UnifiedManager) Apply(pid int) error { return err } - path, err := getv2Path(m.Cgroups) + _, err = m.GetUnifiedPath() if err != nil { return err } - if err := createCgroupsv2Path(path); err != nil { + if err := createCgroupsv2Path(m.path); err != nil { return err } - m.Paths = map[string]string{ - "pids": path, - "memory": path, - "io": path, - "cpu": path, - "devices": path, - "cpuset": path, - "freezer": path, - } return nil } @@ -175,39 +170,39 @@ func (m *UnifiedManager) Destroy() error { return err } dbusConnection.StopUnit(getUnitName(m.Cgroups), "replace", nil) - if err := cgroups.RemovePaths(m.Paths); err != nil { + + // XXX this is probably not needed, systemd should handle it + err = os.Remove(m.path) + if err != nil && !os.IsNotExist(err) { return err } - m.Paths = make(map[string]string) + return nil } +// this method is for v1 backward compatibility and will be removed func (m *UnifiedManager) GetPaths() map[string]string { - m.mu.Lock() - paths := m.Paths - m.mu.Unlock() + _, _ = m.GetUnifiedPath() + paths := map[string]string{ + "pids": m.path, + "memory": m.path, + "io": m.path, + "cpu": m.path, + "devices": m.path, + "cpuset": m.path, + "freezer": m.path, + } return paths } + func (m *UnifiedManager) GetUnifiedPath() (string, error) { - unifiedPath := "" m.mu.Lock() defer m.mu.Unlock() - for k, v := range m.Paths { - if unifiedPath == "" { - unifiedPath = v - } else if v != unifiedPath { - return unifiedPath, - errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v) - } - } - if unifiedPath == "" { - // FIXME: unified path could be detected even when no controller is available - return unifiedPath, errors.New("cannot detect unified path") + if m.path != "" { + return m.path, nil } - return unifiedPath, nil -} -func getv2Path(c *configs.Cgroup) (string, error) { + c := m.Cgroups slice := "system.slice" if c.Parent != "" { slice = c.Parent @@ -218,7 +213,8 @@ func getv2Path(c *configs.Cgroup) (string, error) { return "", err } - return filepath.Join(fs2.UnifiedMountpoint, slice, getUnitName(c)), nil + m.path = filepath.Join(fs2.UnifiedMountpoint, slice, getUnitName(c)) + return m.path, nil } func createCgroupsv2Path(path string) (Err error) { @@ -263,7 +259,7 @@ func (m *UnifiedManager) fsManager() (cgroups.Manager, error) { if err != nil { return nil, err } - return fs2.NewManager(m.Cgroups, path, false) + return fs2.NewManager(m.Cgroups, path, m.rootless) } func (m *UnifiedManager) Freeze(state configs.FreezerState) error { diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index a572c0f6a7b..fa446c03d6f 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -50,17 +50,6 @@ func InitArgs(args ...string) func(*LinuxFactory) error { } } -// SystemdCgroups is an options func to configure a LinuxFactory to return -// containers that use systemd to create and manage cgroups. -func SystemdCgroups(l *LinuxFactory) error { - systemdCgroupsManager, err := systemd.NewSystemdCgroupsManager() - if err != nil { - return err - } - l.NewCgroupsManager = systemdCgroupsManager - return nil -} - func getUnifiedPath(paths map[string]string) string { unifiedPath := "" for k, v := range paths { @@ -74,6 +63,34 @@ func getUnifiedPath(paths map[string]string) string { return unifiedPath } +func systemdCgroupV2(l *LinuxFactory, rootless bool) error { + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return systemd.NewUnifiedManager(config, getUnifiedPath(paths), rootless) + } + return nil +} + +// SystemdCgroups is an options func to configure a LinuxFactory to return +// containers that use systemd to create and manage cgroups. +func SystemdCgroups(l *LinuxFactory) error { + if !systemd.IsRunningSystemd() { + return fmt.Errorf("systemd not running on this host, can't use systemd as cgroups manager") + } + + if cgroups.IsCgroup2UnifiedMode() { + return systemdCgroupV2(l, false) + } + + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &systemd.LegacyManager{ + Cgroups: config, + Paths: paths, + } + } + + return nil +} + func cgroupfs2(l *LinuxFactory, rootless bool) error { l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { m, err := fs2.NewManager(config, getUnifiedPath(paths), rootless) From 88c13c0713d15cd065df6af9e803646cb23f303c Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 5 Apr 2020 20:12:20 -0700 Subject: [PATCH 04/13] cgroupv2: use SecureJoin in systemd driver It seems that some paths are coming from user and are therefore untrusted. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/systemd/unified_hierarchy.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libcontainer/cgroups/systemd/unified_hierarchy.go b/libcontainer/cgroups/systemd/unified_hierarchy.go index 9e0d6db9c45..540161afeec 100644 --- a/libcontainer/cgroups/systemd/unified_hierarchy.go +++ b/libcontainer/cgroups/systemd/unified_hierarchy.go @@ -13,6 +13,7 @@ import ( "time" systemdDbus "github.com/coreos/go-systemd/v22/dbus" + securejoin "github.com/cyphar/filepath-securejoin" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs2" "github.com/opencontainers/runc/libcontainer/configs" @@ -213,7 +214,13 @@ func (m *UnifiedManager) GetUnifiedPath() (string, error) { return "", err } - m.path = filepath.Join(fs2.UnifiedMountpoint, slice, getUnitName(c)) + path := filepath.Join(slice, getUnitName(c)) + path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path) + if err != nil { + return "", err + } + m.path = path + return m.path, nil } From dbeff8949175b38ff156c54606f19b17e3d7811a Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 5 Apr 2020 19:06:25 -0700 Subject: [PATCH 05/13] cgroupv2/systemd: privatize UnifiedManager ... and its Cgroup field. There is no sense to keep it public. This was generated by gorename. Signed-off-by: Kir Kolyshkin --- .../cgroups/systemd/unified_hierarchy.go | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/libcontainer/cgroups/systemd/unified_hierarchy.go b/libcontainer/cgroups/systemd/unified_hierarchy.go index 540161afeec..9f4a4e5a56f 100644 --- a/libcontainer/cgroups/systemd/unified_hierarchy.go +++ b/libcontainer/cgroups/systemd/unified_hierarchy.go @@ -20,25 +20,25 @@ import ( "github.com/sirupsen/logrus" ) -type UnifiedManager struct { +type unifiedManager struct { mu sync.Mutex - Cgroups *configs.Cgroup + cgroups *configs.Cgroup // path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" path string rootless bool } -func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) *UnifiedManager { - return &UnifiedManager{ - Cgroups: config, +func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) *unifiedManager { + return &unifiedManager{ + cgroups: config, path: path, rootless: rootless, } } -func (m *UnifiedManager) Apply(pid int) error { +func (m *unifiedManager) Apply(pid int) error { var ( - c = m.Cgroups + c = m.cgroups unitName = getUnitName(c) slice = "system.slice" properties []systemdDbus.Property @@ -159,8 +159,8 @@ func (m *UnifiedManager) Apply(pid int) error { return nil } -func (m *UnifiedManager) Destroy() error { - if m.Cgroups.Paths != nil { +func (m *unifiedManager) Destroy() error { + if m.cgroups.Paths != nil { return nil } m.mu.Lock() @@ -170,7 +170,7 @@ func (m *UnifiedManager) Destroy() error { if err != nil { return err } - dbusConnection.StopUnit(getUnitName(m.Cgroups), "replace", nil) + dbusConnection.StopUnit(getUnitName(m.cgroups), "replace", nil) // XXX this is probably not needed, systemd should handle it err = os.Remove(m.path) @@ -182,7 +182,7 @@ func (m *UnifiedManager) Destroy() error { } // this method is for v1 backward compatibility and will be removed -func (m *UnifiedManager) GetPaths() map[string]string { +func (m *unifiedManager) GetPaths() map[string]string { _, _ = m.GetUnifiedPath() paths := map[string]string{ "pids": m.path, @@ -196,14 +196,14 @@ func (m *UnifiedManager) GetPaths() map[string]string { return paths } -func (m *UnifiedManager) GetUnifiedPath() (string, error) { +func (m *unifiedManager) GetUnifiedPath() (string, error) { m.mu.Lock() defer m.mu.Unlock() if m.path != "" { return m.path, nil } - c := m.Cgroups + c := m.cgroups slice := "system.slice" if c.Parent != "" { slice = c.Parent @@ -261,15 +261,15 @@ func createCgroupsv2Path(path string) (Err error) { return nil } -func (m *UnifiedManager) fsManager() (cgroups.Manager, error) { +func (m *unifiedManager) fsManager() (cgroups.Manager, error) { path, err := m.GetUnifiedPath() if err != nil { return nil, err } - return fs2.NewManager(m.Cgroups, path, m.rootless) + return fs2.NewManager(m.cgroups, path, m.rootless) } -func (m *UnifiedManager) Freeze(state configs.FreezerState) error { +func (m *unifiedManager) Freeze(state configs.FreezerState) error { fsMgr, err := m.fsManager() if err != nil { return err @@ -277,7 +277,7 @@ func (m *UnifiedManager) Freeze(state configs.FreezerState) error { return fsMgr.Freeze(state) } -func (m *UnifiedManager) GetPids() ([]int, error) { +func (m *unifiedManager) GetPids() ([]int, error) { path, err := m.GetUnifiedPath() if err != nil { return nil, err @@ -285,7 +285,7 @@ func (m *UnifiedManager) GetPids() ([]int, error) { return cgroups.GetPids(path) } -func (m *UnifiedManager) GetAllPids() ([]int, error) { +func (m *unifiedManager) GetAllPids() ([]int, error) { path, err := m.GetUnifiedPath() if err != nil { return nil, err @@ -293,7 +293,7 @@ func (m *UnifiedManager) GetAllPids() ([]int, error) { return cgroups.GetAllPids(path) } -func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) { +func (m *unifiedManager) GetStats() (*cgroups.Stats, error) { fsMgr, err := m.fsManager() if err != nil { return nil, err @@ -301,7 +301,7 @@ func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) { return fsMgr.GetStats() } -func (m *UnifiedManager) Set(container *configs.Config) error { +func (m *unifiedManager) Set(container *configs.Config) error { fsMgr, err := m.fsManager() if err != nil { return err @@ -309,6 +309,6 @@ func (m *UnifiedManager) Set(container *configs.Config) error { return fsMgr.Set(container) } -func (m *UnifiedManager) GetCgroups() (*configs.Cgroup, error) { - return m.Cgroups, nil +func (m *unifiedManager) GetCgroups() (*configs.Cgroup, error) { + return m.cgroups, nil } From 60eaed2ed61e71ad5ec521de88a29ac42fa1f7bb Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 5 Apr 2020 20:39:25 -0700 Subject: [PATCH 06/13] cgroupv2: move sanity path check to common code The fs2 cgroup driver has a sanity check for path. Since systemd driver is relying on the same path, it makes sense to move this check to the common code. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/fs2/fs2.go | 6 +----- libcontainer/factory_linux.go | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go index 4ab291b1889..4f047bd5c52 100644 --- a/libcontainer/cgroups/fs2/fs2.go +++ b/libcontainer/cgroups/fs2/fs2.go @@ -30,11 +30,7 @@ func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups. if config == nil { config = &configs.Cgroup{} } - if dirPath != "" { - if filepath.Clean(dirPath) != dirPath || !filepath.IsAbs(dirPath) { - return nil, errors.Errorf("invalid dir path %q", dirPath) - } - } else { + if dirPath == "" { var err error dirPath, err = defaultDirPath(config) if err != nil { diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index fa446c03d6f..03c6ec45b31 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -51,16 +51,22 @@ func InitArgs(args ...string) func(*LinuxFactory) error { } func getUnifiedPath(paths map[string]string) string { - unifiedPath := "" + path := "" for k, v := range paths { - if unifiedPath == "" { - unifiedPath = v - } else if v != unifiedPath { - panic(errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v)) + if path == "" { + path = v + } else if v != path { + panic(errors.Errorf("expected %q path to be unified path %q, got %q", k, path, v)) } } // can be empty - return unifiedPath + if path != "" { + if filepath.Clean(path) != path || !filepath.IsAbs(path) { + panic(errors.Errorf("invalid dir path %q", path)) + } + } + + return path } func systemdCgroupV2(l *LinuxFactory, rootless bool) error { From 813cb3eb94659971c69c0dd19b2d5a9737891f15 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 5 Apr 2020 21:02:06 -0700 Subject: [PATCH 07/13] cgroupv2: fix fs2 cgroup init fs2 cgroup driver was not working because it did not enable controllers while creating cgroup directory; instead it was merely doing MkdirAll() and gathered the list of available controllers in NewManager(). Also, cgroup should be created in Apply(), not while creating a new manager instance. To fix: 1. Move the createCgroupsv2Path function from systemd driver to fs2 driver, renaming it to CreateCgroupPath. Use in Apply() from both fs2 and systemd drivers. 2. Delay available controllers map initialization to until it is needed. With this patch: - NewManager() only performs minimal initialization (initializin m.dirPath, if not provided); - Apply() properly creates cgroup path, enabling the controllers; - m.controllers is initialized lazily on demand. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/fs2/create.go | 49 +++++++++++++++++++ libcontainer/cgroups/fs2/fs2.go | 47 ++++++++++-------- .../cgroups/systemd/unified_hierarchy.go | 41 +--------------- 3 files changed, 77 insertions(+), 60 deletions(-) create mode 100644 libcontainer/cgroups/fs2/create.go diff --git a/libcontainer/cgroups/fs2/create.go b/libcontainer/cgroups/fs2/create.go new file mode 100644 index 00000000000..fb5e50507d2 --- /dev/null +++ b/libcontainer/cgroups/fs2/create.go @@ -0,0 +1,49 @@ +package fs2 + +import ( + "bytes" + "io/ioutil" + "os" + "path/filepath" + "strings" +) + +// CreateCgroupPath creates cgroupv2 path, enabling all the +// available controllers in the process. +func CreateCgroupPath(path string) (Err error) { + const file = UnifiedMountpoint + "/cgroup.controllers" + content, err := ioutil.ReadFile(file) + if err != nil { + return err + } + + ctrs := bytes.Fields(content) + res := append([]byte("+"), bytes.Join(ctrs, []byte(" +"))...) + + current := "/sys/fs" + elements := strings.Split(path, "/") + for i, e := range elements[3:] { + current = filepath.Join(current, e) + if i > 0 { + if err := os.Mkdir(current, 0755); err != nil { + if !os.IsExist(err) { + return err + } + } else { + // If the directory was created, be sure it is not left around on errors. + current := current + defer func() { + if Err != nil { + os.Remove(current) + } + }() + } + } + if i < len(elements[3:])-1 { + if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), res, 0755); err != nil { + return err + } + } + } + return nil +} diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go index 4f047bd5c52..f32f647d805 100644 --- a/libcontainer/cgroups/fs2/fs2.go +++ b/libcontainer/cgroups/fs2/fs2.go @@ -37,38 +37,38 @@ func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups. return nil, err } } - controllers, err := detectControllers(dirPath) - if err != nil && !rootless { - return nil, err - } m := &manager{ - config: config, - dirPath: dirPath, - controllers: controllers, - rootless: rootless, + config: config, + dirPath: dirPath, + rootless: rootless, } return m, nil } -func detectControllers(dirPath string) (map[string]struct{}, error) { - if err := os.MkdirAll(dirPath, 0755); err != nil { - return nil, err +func (m *manager) getControllers() error { + if m.controllers != nil { + return nil } - controllersPath := filepath.Join(dirPath, "cgroup.controllers") - controllersData, err := ioutil.ReadFile(controllersPath) - if err != nil { - return nil, err + + file := filepath.Join(m.dirPath, "cgroup.controllers") + data, err := ioutil.ReadFile(file) + if err != nil && !m.rootless { + return err } - controllersFields := strings.Fields(string(controllersData)) - controllers := make(map[string]struct{}, len(controllersFields)) - for _, c := range controllersFields { - controllers[c] = struct{}{} + fields := strings.Fields(string(data)) + m.controllers = make(map[string]struct{}, len(fields)) + for _, c := range fields { + m.controllers[c] = struct{}{} } - return controllers, nil + + return nil } func (m *manager) Apply(pid int) error { + if err := CreateCgroupPath(m.dirPath); err != nil { + return err + } if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil && !m.rootless { return err } @@ -89,6 +89,9 @@ func (m *manager) GetStats() (*cgroups.Stats, error) { ) st := cgroups.NewStats() + if err := m.getControllers(); err != nil { + return st, err + } // pids (since kernel 4.5) if _, ok := m.controllers["pids"]; ok { @@ -144,6 +147,7 @@ func (m *manager) Destroy() error { // GetPaths is for compatibility purpose and should be removed in future func (m *manager) GetPaths() map[string]string { + _ = m.getControllers() paths := map[string]string{ // pseudo-controller for compatibility "devices": m.dirPath, @@ -163,6 +167,9 @@ func (m *manager) Set(container *configs.Config) error { if container == nil || container.Cgroups == nil { return nil } + if err := m.getControllers(); err != nil { + return err + } var errs []error // pids (since kernel 4.5) if _, ok := m.controllers["pids"]; ok { diff --git a/libcontainer/cgroups/systemd/unified_hierarchy.go b/libcontainer/cgroups/systemd/unified_hierarchy.go index 9f4a4e5a56f..18280371d19 100644 --- a/libcontainer/cgroups/systemd/unified_hierarchy.go +++ b/libcontainer/cgroups/systemd/unified_hierarchy.go @@ -3,8 +3,6 @@ package systemd import ( - "bytes" - "io/ioutil" "math" "os" "path/filepath" @@ -153,7 +151,7 @@ func (m *unifiedManager) Apply(pid int) error { if err != nil { return err } - if err := createCgroupsv2Path(m.path); err != nil { + if err := fs2.CreateCgroupPath(m.path); err != nil { return err } return nil @@ -224,43 +222,6 @@ func (m *unifiedManager) GetUnifiedPath() (string, error) { return m.path, nil } -func createCgroupsv2Path(path string) (Err error) { - content, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers") - if err != nil { - return err - } - - ctrs := bytes.Fields(content) - res := append([]byte("+"), bytes.Join(ctrs, []byte(" +"))...) - - current := "/sys/fs" - elements := strings.Split(path, "/") - for i, e := range elements[3:] { - current = filepath.Join(current, e) - if i > 0 { - if err := os.Mkdir(current, 0755); err != nil { - if !os.IsExist(err) { - return err - } - } else { - // If the directory was created, be sure it is not left around on errors. - current := current - defer func() { - if Err != nil { - os.Remove(current) - } - }() - } - } - if i < len(elements[3:])-1 { - if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), res, 0755); err != nil { - return err - } - } - } - return nil -} - func (m *unifiedManager) fsManager() (cgroups.Manager, error) { path, err := m.GetUnifiedPath() if err != nil { From b5c1949f2ab542e4e10060d8873a9d9de90ac836 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 7 Apr 2020 12:43:36 -0700 Subject: [PATCH 08/13] cgroups/fs2/CreateCgroupPath: reinstate check This check was removed in commit 5406833a65e7. Now, when this function is called from a few places, it is no longer obvious that the path always starts with /sys/fs/cgroup/, so reinstate the check just to be on the safe side. This check also ensures that elements[3:] can be used safely. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/fs2/create.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libcontainer/cgroups/fs2/create.go b/libcontainer/cgroups/fs2/create.go index fb5e50507d2..324df0323a4 100644 --- a/libcontainer/cgroups/fs2/create.go +++ b/libcontainer/cgroups/fs2/create.go @@ -2,6 +2,7 @@ package fs2 import ( "bytes" + "fmt" "io/ioutil" "os" "path/filepath" @@ -11,6 +12,10 @@ import ( // CreateCgroupPath creates cgroupv2 path, enabling all the // available controllers in the process. func CreateCgroupPath(path string) (Err error) { + if !strings.HasPrefix(path, UnifiedMountpoint) { + return fmt.Errorf("invalid cgroup path %s", path) + } + const file = UnifiedMountpoint + "/cgroup.controllers" content, err := ioutil.ReadFile(file) if err != nil { From de1134156b4d0f2824feb9d4bb4008b791d6a441 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 7 Apr 2020 12:50:17 -0700 Subject: [PATCH 09/13] cgroups/fs2/CreateCgroupPath: nit This slightly improves code readability. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/fs2/create.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libcontainer/cgroups/fs2/create.go b/libcontainer/cgroups/fs2/create.go index 324df0323a4..e4be1e3a179 100644 --- a/libcontainer/cgroups/fs2/create.go +++ b/libcontainer/cgroups/fs2/create.go @@ -25,9 +25,10 @@ func CreateCgroupPath(path string) (Err error) { ctrs := bytes.Fields(content) res := append([]byte("+"), bytes.Join(ctrs, []byte(" +"))...) - current := "/sys/fs" elements := strings.Split(path, "/") - for i, e := range elements[3:] { + elements = elements[3:] + current := "/sys/fs" + for i, e := range elements { current = filepath.Join(current, e) if i > 0 { if err := os.Mkdir(current, 0755); err != nil { @@ -44,7 +45,7 @@ func CreateCgroupPath(path string) (Err error) { }() } } - if i < len(elements[3:])-1 { + if i < len(elements)-1 { if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), res, 0755); err != nil { return err } From bb47e358437bdb917a1fb5e24b0882ca6761d83d Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 7 Apr 2020 14:31:54 -0700 Subject: [PATCH 10/13] cgroup/systemd: reorganize 1. Rename the files - v1.go: cgroupv1 aka legacy; - v2.go: cgroupv2 aka unified hierarchy; - unsupported.go: when systemd is not available. 2. Move the code that is common between v1 and v2 to common.go Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/systemd/common.go | 101 ++++++++++++++++++ .../{apply_nosystemd.go => unsupported.go} | 0 .../systemd/{apply_systemd.go => v1.go} | 92 ---------------- .../systemd/{unified_hierarchy.go => v2.go} | 0 4 files changed, 101 insertions(+), 92 deletions(-) create mode 100644 libcontainer/cgroups/systemd/common.go rename libcontainer/cgroups/systemd/{apply_nosystemd.go => unsupported.go} (100%) rename libcontainer/cgroups/systemd/{apply_systemd.go => v1.go} (80%) rename libcontainer/cgroups/systemd/{unified_hierarchy.go => v2.go} (100%) diff --git a/libcontainer/cgroups/systemd/common.go b/libcontainer/cgroups/systemd/common.go new file mode 100644 index 00000000000..eeb0d532816 --- /dev/null +++ b/libcontainer/cgroups/systemd/common.go @@ -0,0 +1,101 @@ +package systemd + +import ( + "fmt" + "os" + "strings" + "sync" + + systemdDbus "github.com/coreos/go-systemd/v22/dbus" + dbus "github.com/godbus/dbus/v5" + "github.com/opencontainers/runc/libcontainer/configs" +) + +var ( + connOnce sync.Once + connDbus *systemdDbus.Conn + connErr error +) + +// NOTE: This function comes from package github.com/coreos/go-systemd/util +// It was borrowed here to avoid a dependency on cgo. +// +// IsRunningSystemd checks whether the host was booted with systemd as its init +// system. This functions similarly to systemd's `sd_booted(3)`: internally, it +// checks whether /run/systemd/system/ exists and is a directory. +// http://www.freedesktop.org/software/systemd/man/sd_booted.html +func IsRunningSystemd() bool { + fi, err := os.Lstat("/run/systemd/system") + if err != nil { + return false + } + return fi.IsDir() +} + +// systemd represents slice hierarchy using `-`, so we need to follow suit when +// generating the path of slice. Essentially, test-a-b.slice becomes +// /test.slice/test-a.slice/test-a-b.slice. +func ExpandSlice(slice string) (string, error) { + suffix := ".slice" + // Name has to end with ".slice", but can't be just ".slice". + if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + // Path-separators are not allowed. + if strings.Contains(slice, "/") { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + var path, prefix string + sliceName := strings.TrimSuffix(slice, suffix) + // if input was -.slice, we should just return root now + if sliceName == "-" { + return "/", nil + } + for _, component := range strings.Split(sliceName, "-") { + // test--a.slice isn't permitted, nor is -test.slice. + if component == "" { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + // Append the component to the path and to the prefix. + path += "/" + prefix + component + suffix + prefix += component + "-" + } + return path, nil +} + +// getDbusConnection lazy initializes systemd dbus connection +// and returns it +func getDbusConnection() (*systemdDbus.Conn, error) { + connOnce.Do(func() { + connDbus, connErr = systemdDbus.New() + }) + return connDbus, connErr +} + +func newProp(name string, units interface{}) systemdDbus.Property { + return systemdDbus.Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + +func getUnitName(c *configs.Cgroup) string { + // by default, we create a scope unless the user explicitly asks for a slice. + if !strings.HasSuffix(c.Name, ".slice") { + return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name) + } + return c.Name +} + +// isUnitExists returns true if the error is that a systemd unit already exists. +func isUnitExists(err error) bool { + if err != nil { + if dbusError, ok := err.(dbus.Error); ok { + return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists") + } + } + return false +} diff --git a/libcontainer/cgroups/systemd/apply_nosystemd.go b/libcontainer/cgroups/systemd/unsupported.go similarity index 100% rename from libcontainer/cgroups/systemd/apply_nosystemd.go rename to libcontainer/cgroups/systemd/unsupported.go diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/v1.go similarity index 80% rename from libcontainer/cgroups/systemd/apply_systemd.go rename to libcontainer/cgroups/systemd/v1.go index dca4870c894..545dbf1c75f 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/v1.go @@ -4,7 +4,6 @@ package systemd import ( "errors" - "fmt" "io/ioutil" "math" "os" @@ -14,7 +13,6 @@ import ( "time" systemdDbus "github.com/coreos/go-systemd/v22/dbus" - dbus "github.com/godbus/dbus/v5" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/configs" @@ -65,43 +63,6 @@ var legacySubsystems = subsystemSet{ &fs.NameGroup{GroupName: "name=systemd"}, } -var ( - connOnce sync.Once - connDbus *systemdDbus.Conn - connErr error -) - -func newProp(name string, units interface{}) systemdDbus.Property { - return systemdDbus.Property{ - Name: name, - Value: dbus.MakeVariant(units), - } -} - -// NOTE: This function comes from package github.com/coreos/go-systemd/util -// It was borrowed here to avoid a dependency on cgo. -// -// IsRunningSystemd checks whether the host was booted with systemd as its init -// system. This functions similarly to systemd's `sd_booted(3)`: internally, it -// checks whether /run/systemd/system/ exists and is a directory. -// http://www.freedesktop.org/software/systemd/man/sd_booted.html -func IsRunningSystemd() bool { - fi, err := os.Lstat("/run/systemd/system") - if err != nil { - return false - } - return fi.IsDir() -} - -// getDbusConnection lazy initializes systemd dbus connection -// and returns it -func getDbusConnection() (*systemdDbus.Conn, error) { - connOnce.Do(func() { - connDbus, connErr = systemdDbus.New() - }) - return connDbus, connErr -} - func (m *LegacyManager) Apply(pid int) error { var ( c = m.Cgroups @@ -331,40 +292,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error { return nil } -// systemd represents slice hierarchy using `-`, so we need to follow suit when -// generating the path of slice. Essentially, test-a-b.slice becomes -// /test.slice/test-a.slice/test-a-b.slice. -func ExpandSlice(slice string) (string, error) { - suffix := ".slice" - // Name has to end with ".slice", but can't be just ".slice". - if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) { - return "", fmt.Errorf("invalid slice name: %s", slice) - } - - // Path-separators are not allowed. - if strings.Contains(slice, "/") { - return "", fmt.Errorf("invalid slice name: %s", slice) - } - - var path, prefix string - sliceName := strings.TrimSuffix(slice, suffix) - // if input was -.slice, we should just return root now - if sliceName == "-" { - return "/", nil - } - for _, component := range strings.Split(sliceName, "-") { - // test--a.slice isn't permitted, nor is -test.slice. - if component == "" { - return "", fmt.Errorf("invalid slice name: %s", slice) - } - - // Append the component to the path and to the prefix. - path += "/" + prefix + component + suffix - prefix += component + "-" - } - return path, nil -} - func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { mountpoint, err := cgroups.FindCgroupMountpoint(c.Path, subsystem) if err != nil { @@ -469,14 +396,6 @@ func (m *LegacyManager) Set(container *configs.Config) error { return nil } -func getUnitName(c *configs.Cgroup) string { - // by default, we create a scope unless the user explicitly asks for a slice. - if !strings.HasSuffix(c.Name, ".slice") { - return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name) - } - return c.Name -} - func setKernelMemory(c *configs.Cgroup) error { path, err := getSubsystemPath(c, "memory") if err != nil && !cgroups.IsNotFound(err) { @@ -497,17 +416,6 @@ func setKernelMemory(c *configs.Cgroup) error { } return fs.EnableKernelMemoryAccounting(path) } - -// isUnitExists returns true if the error is that a systemd unit already exists. -func isUnitExists(err error) bool { - if err != nil { - if dbusError, ok := err.(dbus.Error); ok { - return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists") - } - } - return false -} - func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) { return m.Cgroups, nil } diff --git a/libcontainer/cgroups/systemd/unified_hierarchy.go b/libcontainer/cgroups/systemd/v2.go similarity index 100% rename from libcontainer/cgroups/systemd/unified_hierarchy.go rename to libcontainer/cgroups/systemd/v2.go From 4b4bc995ad4ee4679ffbe38f1672be01c24256fc Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 8 Apr 2020 03:37:47 -0700 Subject: [PATCH 11/13] CreateCgroupPath: only enable needed controllers 1. Instead of enabling all available controllers, figure out which ones are required, and only enable those. 2. Amend all setFoo() functions to call isFooSet(). While this might seem unnecessary, it might actually help to uncover a bug. Imagine someone: - adds a cgroup.Resources.CpuFoo setting; - modifies setCpu() to apply the new setting; - but forgets to amend isCpuSet() accordingly <-- BUG In this case, a test case modifying CpuFoo will help to uncover the BUG. This is the reason why it's added. This patch *could be* amended by enabling controllers on a best-effort basis, i.e. : - do not return an error early if we can't enable some controllers; - if we fail to enable all controllers at once (usually because one of them can't be enabled), try enabling them one by one. Currently this is not implemented, and it's not clear whether this would be a good way to go or not. [v2: add/use is${Controller}Set() functions] [v3: document neededControllers()] [v4: drop "best-effort" part] Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/fs2/cpu.go | 8 ++++ libcontainer/cgroups/fs2/cpuset.go | 8 ++++ libcontainer/cgroups/fs2/create.go | 74 +++++++++++++++++++++++++---- libcontainer/cgroups/fs2/fs2.go | 2 +- libcontainer/cgroups/fs2/hugetlb.go | 7 +++ libcontainer/cgroups/fs2/io.go | 12 +++++ libcontainer/cgroups/fs2/memory.go | 8 ++++ libcontainer/cgroups/fs2/pids.go | 7 +++ libcontainer/cgroups/systemd/v2.go | 2 +- 9 files changed, 117 insertions(+), 11 deletions(-) diff --git a/libcontainer/cgroups/fs2/cpu.go b/libcontainer/cgroups/fs2/cpu.go index 8ec855714f6..7129cf79c4d 100644 --- a/libcontainer/cgroups/fs2/cpu.go +++ b/libcontainer/cgroups/fs2/cpu.go @@ -13,7 +13,15 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" ) +func isCpuSet(cgroup *configs.Cgroup) bool { + return cgroup.Resources.CpuWeight != 0 || cgroup.Resources.CpuMax != "" +} + func setCpu(dirPath string, cgroup *configs.Cgroup) error { + if !isCpuSet(cgroup) { + return nil + } + // NOTE: .CpuShares is not used here. Conversion is the caller's responsibility. if cgroup.Resources.CpuWeight != 0 { if err := fscommon.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(cgroup.Resources.CpuWeight, 10)); err != nil { diff --git a/libcontainer/cgroups/fs2/cpuset.go b/libcontainer/cgroups/fs2/cpuset.go index 6492ac93c58..fb4456b43c1 100644 --- a/libcontainer/cgroups/fs2/cpuset.go +++ b/libcontainer/cgroups/fs2/cpuset.go @@ -7,7 +7,15 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" ) +func isCpusetSet(cgroup *configs.Cgroup) bool { + return cgroup.Resources.CpusetCpus != "" || cgroup.Resources.CpusetMems != "" +} + func setCpuset(dirPath string, cgroup *configs.Cgroup) error { + if !isCpusetSet(cgroup) { + return nil + } + if cgroup.Resources.CpusetCpus != "" { if err := fscommon.WriteFile(dirPath, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil { return err diff --git a/libcontainer/cgroups/fs2/create.go b/libcontainer/cgroups/fs2/create.go index e4be1e3a179..0cc2c1f61f2 100644 --- a/libcontainer/cgroups/fs2/create.go +++ b/libcontainer/cgroups/fs2/create.go @@ -1,29 +1,80 @@ package fs2 import ( - "bytes" "fmt" "io/ioutil" "os" "path/filepath" "strings" + + "github.com/opencontainers/runc/libcontainer/configs" ) +// neededControllers returns the string to write to cgroup.subtree_control, +// containing the list of controllers to enable (for example, "+cpu +pids"), +// based on (1) controllers available and (2) resources that are being set. +// +// The resulting string does not include "pseudo" controllers such as +// "freezer" and "devices". +func neededControllers(cgroup *configs.Cgroup) ([]string, error) { + var list []string + + if cgroup == nil { + return list, nil + } + + // list of all available controllers + const file = UnifiedMountpoint + "/cgroup.controllers" + content, err := ioutil.ReadFile(file) + if err != nil { + return list, err + } + avail := make(map[string]struct{}) + for _, ctr := range strings.Fields(string(content)) { + avail[ctr] = struct{}{} + } + + // add the controller if available + add := func(controller string) { + if _, ok := avail[controller]; ok { + list = append(list, "+"+controller) + } + } + + if isPidsSet(cgroup) { + add("pids") + } + if isMemorySet(cgroup) { + add("memory") + } + if isIoSet(cgroup) { + add("io") + } + if isCpuSet(cgroup) { + add("cpu") + } + if isCpusetSet(cgroup) { + add("cpuset") + } + if isHugeTlbSet(cgroup) { + add("hugetlb") + } + + return list, nil +} + // CreateCgroupPath creates cgroupv2 path, enabling all the -// available controllers in the process. -func CreateCgroupPath(path string) (Err error) { +// needed controllers in the process. +func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) { if !strings.HasPrefix(path, UnifiedMountpoint) { return fmt.Errorf("invalid cgroup path %s", path) } - const file = UnifiedMountpoint + "/cgroup.controllers" - content, err := ioutil.ReadFile(file) + ctrs, err := neededControllers(c) if err != nil { return err } - - ctrs := bytes.Fields(content) - res := append([]byte("+"), bytes.Join(ctrs, []byte(" +"))...) + allCtrs := strings.Join(ctrs, " ") elements := strings.Split(path, "/") elements = elements[3:] @@ -45,11 +96,16 @@ func CreateCgroupPath(path string) (Err error) { }() } } + // enable needed controllers if i < len(elements)-1 { - if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), res, 0755); err != nil { + file := filepath.Join(current, "cgroup.subtree_control") + if err := ioutil.WriteFile(file, []byte(allCtrs), 0755); err != nil { + // XXX: we can enable _some_ controllers doing it one-by one + // instead of erroring out -- does it makes sense to do so? return err } } } + return nil } diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go index f32f647d805..d845a1b5ebe 100644 --- a/libcontainer/cgroups/fs2/fs2.go +++ b/libcontainer/cgroups/fs2/fs2.go @@ -66,7 +66,7 @@ func (m *manager) getControllers() error { } func (m *manager) Apply(pid int) error { - if err := CreateCgroupPath(m.dirPath); err != nil { + if err := CreateCgroupPath(m.dirPath, m.config); err != nil { return err } if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil && !m.rootless { diff --git a/libcontainer/cgroups/fs2/hugetlb.go b/libcontainer/cgroups/fs2/hugetlb.go index c7610c0a414..4a399aaecd5 100644 --- a/libcontainer/cgroups/fs2/hugetlb.go +++ b/libcontainer/cgroups/fs2/hugetlb.go @@ -15,7 +15,14 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" ) +func isHugeTlbSet(cgroup *configs.Cgroup) bool { + return len(cgroup.Resources.HugetlbLimit) > 0 +} + func setHugeTlb(dirPath string, cgroup *configs.Cgroup) error { + if !isHugeTlbSet(cgroup) { + return nil + } for _, hugetlb := range cgroup.Resources.HugetlbLimit { if err := fscommon.WriteFile(dirPath, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "max"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil { return err diff --git a/libcontainer/cgroups/fs2/io.go b/libcontainer/cgroups/fs2/io.go index 677c8d286cf..bbe3ac064b3 100644 --- a/libcontainer/cgroups/fs2/io.go +++ b/libcontainer/cgroups/fs2/io.go @@ -14,7 +14,19 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" ) +func isIoSet(cgroup *configs.Cgroup) bool { + return cgroup.Resources.BlkioWeight != 0 || + len(cgroup.Resources.BlkioThrottleReadBpsDevice) > 0 || + len(cgroup.Resources.BlkioThrottleWriteBpsDevice) > 0 || + len(cgroup.Resources.BlkioThrottleReadIOPSDevice) > 0 || + len(cgroup.Resources.BlkioThrottleWriteIOPSDevice) > 0 +} + func setIo(dirPath string, cgroup *configs.Cgroup) error { + if !isIoSet(cgroup) { + return nil + } + if cgroup.Resources.BlkioWeight != 0 { filename := "io.bfq.weight" if err := fscommon.WriteFile(dirPath, filename, diff --git a/libcontainer/cgroups/fs2/memory.go b/libcontainer/cgroups/fs2/memory.go index 09a92d468b9..681bedd111e 100644 --- a/libcontainer/cgroups/fs2/memory.go +++ b/libcontainer/cgroups/fs2/memory.go @@ -32,7 +32,15 @@ func numToStr(value int64) (ret string) { return ret } +func isMemorySet(cgroup *configs.Cgroup) bool { + return cgroup.Resources.MemoryReservation != 0 || + cgroup.Resources.Memory != 0 || cgroup.Resources.MemorySwap != 0 +} + func setMemory(dirPath string, cgroup *configs.Cgroup) error { + if !isMemorySet(cgroup) { + return nil + } swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(cgroup.Resources.MemorySwap, cgroup.Resources.Memory) if err != nil { return err diff --git a/libcontainer/cgroups/fs2/pids.go b/libcontainer/cgroups/fs2/pids.go index 0cf860f9c7c..b8153d28398 100644 --- a/libcontainer/cgroups/fs2/pids.go +++ b/libcontainer/cgroups/fs2/pids.go @@ -14,7 +14,14 @@ import ( "golang.org/x/sys/unix" ) +func isPidsSet(cgroup *configs.Cgroup) bool { + return cgroup.Resources.PidsLimit != 0 +} + func setPids(dirPath string, cgroup *configs.Cgroup) error { + if !isPidsSet(cgroup) { + return nil + } if val := numToStr(cgroup.Resources.PidsLimit); val != "" { if err := fscommon.WriteFile(dirPath, "pids.max", val); err != nil { return err diff --git a/libcontainer/cgroups/systemd/v2.go b/libcontainer/cgroups/systemd/v2.go index 18280371d19..130fefb645e 100644 --- a/libcontainer/cgroups/systemd/v2.go +++ b/libcontainer/cgroups/systemd/v2.go @@ -151,7 +151,7 @@ func (m *unifiedManager) Apply(pid int) error { if err != nil { return err } - if err := fs2.CreateCgroupPath(m.path); err != nil { + if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil { return err } return nil From 992d5cadfbeee52ac6a781cb68b9ec888a57f865 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 14 Apr 2020 16:18:15 -0700 Subject: [PATCH 12/13] travis: enable fs2 driver test on fedora Run in the same environment as systemd tests. Disable CRIU tests because: - they all fail with cgroup v2; - CRIU v3.14 is required and it's not yet released, and rebuilding it from sources with patches applied (like it is currently done in Dockerfile) is too much work. Signed-off-by: Kir Kolyshkin --- .travis.yml | 2 ++ tests/integration/checkpoint.bats | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index 84213eadb28..34e63ce80af 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,6 +36,8 @@ matrix: - sudo ssh default sudo podman run --privileged --cgroupns=private -v /lib/modules:/lib/modules:ro test make localunittest # cgroupv2+systemd: test on vagrant host itself as we need systemd - sudo ssh default -t 'cd /vagrant && sudo make localintegration RUNC_USE_SYSTEMD=yes' + # same setup but with fs2 driver instead of systemd + - sudo ssh default -t 'cd /vagrant && sudo make localintegration' allow_failures: - go: tip diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats index 87696df6ee9..6c11d083eb1 100644 --- a/tests/integration/checkpoint.bats +++ b/tests/integration/checkpoint.bats @@ -6,6 +6,8 @@ function setup() { if [[ -n "${RUNC_USE_SYSTEMD}" ]] ; then skip "CRIU test suite is skipped on systemd cgroup driver for now." fi + # All checkpoint tests are currently failing on v2 + requires cgroups_v1 teardown_busybox setup_busybox From ab276b1c0914c475e9c900e80862e4b11ba21fe5 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 19 Apr 2020 16:02:12 -0700 Subject: [PATCH 13/13] cgroups/fs2/Destroy: use Remove, ignore ENOENT 1. There is no need to try removing it recursively. 2. Do not treat ENOENT as an error (similar to fs and systemd v1 drivers). Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/fs2/fs2.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go index d845a1b5ebe..93a2aa59d1a 100644 --- a/libcontainer/cgroups/fs2/fs2.go +++ b/libcontainer/cgroups/fs2/fs2.go @@ -142,7 +142,10 @@ func (m *manager) Freeze(state configs.FreezerState) error { } func (m *manager) Destroy() error { - return os.RemoveAll(m.dirPath) + if err := os.Remove(m.dirPath); err != nil && !os.IsNotExist(err) { + return err + } + return nil } // GetPaths is for compatibility purpose and should be removed in future