From f2db87986cab5a7c98b8d9b875bb54e0e7a44219 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 9 Jul 2021 11:18:00 -0700 Subject: [PATCH] libct/cg/sd/v1: Set: avoid unnecessary freeze/thaw Introduce freezeBeforeSet, which contains the logic of figuring out whether we need to freeze/thaw around setting systemd unit properties. In particular, if SkipDevices is set, and the current unit properties allow all devices, there is no need to freeze and thaw, as systemd won't write any device rules in this case. Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/systemd/common.go | 9 +++ libcontainer/cgroups/systemd/v1.go | 87 ++++++++++++++++++++------ 2 files changed, 78 insertions(+), 18 deletions(-) diff --git a/libcontainer/cgroups/systemd/common.go b/libcontainer/cgroups/systemd/common.go index 6edcaebf76f..125f6f5562e 100644 --- a/libcontainer/cgroups/systemd/common.go +++ b/libcontainer/cgroups/systemd/common.go @@ -389,6 +389,15 @@ func resetFailedUnit(cm *dbusConnManager, name string) { } } +func getUnitProperty(cm *dbusConnManager, unitName string, propertyName string) (*systemdDbus.Property, error) { + var prop *systemdDbus.Property + err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) { + prop, Err = c.GetUnitPropertyContext(context.TODO(), unitName, propertyName) + return Err + }) + return prop, err +} + func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error { return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...) diff --git a/libcontainer/cgroups/systemd/v1.go b/libcontainer/cgroups/systemd/v1.go index 936ec4bc57e..1a8e1e3c6c1 100644 --- a/libcontainer/cgroups/systemd/v1.go +++ b/libcontainer/cgroups/systemd/v1.go @@ -10,6 +10,7 @@ import ( "sync" systemdDbus "github.com/coreos/go-systemd/v22/dbus" + "github.com/godbus/dbus/v5" "github.com/sirupsen/logrus" "github.com/opencontainers/runc/libcontainer/cgroups" @@ -331,6 +332,61 @@ func (m *legacyManager) GetStats() (*cgroups.Stats, error) { return stats, nil } +// freezeBeforeSet answers whether there is a need to freeze the cgroup before +// applying its systemd unit properties, and thaw after, while avoiding +// unnecessary freezer state changes. +// +// The reason why we have to freeze is that systemd's application of device +// rules is done disruptively, resulting in spurious errors to common devices +// (unlike our fs driver, they will happily write deny-all rules to running +// containers). So we have to freeze the container to avoid the container get +// an occasional "permission denied" error. +func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (needsFreeze, needsThaw bool, err error) { + // Special case for SkipDevices, as used by Kubernetes to create pod + // cgroups with allow-all device policy). + if r.SkipDevices { + // No need to freeze if SkipDevices is set, and either + // (1) systemd unit does not (yet) exist, or + // (2) it has DevicePolicy=auto and empty DeviceAllow list. + // + // Interestingly, (1) and (2) are the same here because + // a non-existent unit returns default properties, + // and settings in (2) are the defaults. + // + // Do not return errors from getUnitProperty, as they alone + // should not prevent Set from working. + devPolicy, e := getUnitProperty(m.dbus, unitName, "DevicePolicy") + if e == nil && devPolicy.Value == dbus.MakeVariant("auto") { + devAllow, e := getUnitProperty(m.dbus, unitName, "DeviceAllow") + if e == nil && devAllow.Value == dbus.MakeVariant([]deviceAllowEntry{}) { + needsFreeze = false + needsThaw = false + return + } + } + } + + needsFreeze = true + needsThaw = true + + // Check the current freezer state. + freezerState, err := m.GetFreezerState() + if err != nil { + return + } + if freezerState == configs.Frozen { + // Already frozen, and should stay frozen. + needsFreeze = false + needsThaw = false + } + + if r.Freezer == configs.Frozen { + // Will be frozen anyway -- no need to thaw. + needsThaw = false + } + return +} + func (m *legacyManager) Set(r *configs.Resources) error { // If Paths are set, then we are just joining cgroups paths // and there is no need to set any values. @@ -345,29 +401,24 @@ func (m *legacyManager) Set(r *configs.Resources) error { return err } - // Figure out the current freezer state, so we can revert to it after we - // temporarily freeze the container. - targetFreezerState, err := m.GetFreezerState() + unitName := getUnitName(m.cgroups) + needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r) if err != nil { return err } - if targetFreezerState == configs.Undefined { - targetFreezerState = configs.Thawed - } - // We have to freeze the container while systemd sets the cgroup settings. - // The reason for this is that systemd's application of DeviceAllow rules - // is done disruptively, resulting in spurrious errors to common devices - // (unlike our fs driver, they will happily write deny-all rules to running - // containers). So we freeze the container to avoid them hitting the cgroup - // error. But if the freezer cgroup isn't supported, we just warn about it. - if err := m.doFreeze(configs.Frozen); err != nil { - logrus.Infof("freeze container before SetUnitProperties failed: %v", err) + if needsFreeze { + if err := m.doFreeze(configs.Frozen); err != nil { + // If freezer cgroup isn't supported, we just warn about it. + logrus.Infof("freeze container before SetUnitProperties failed: %v", err) + } + } + setErr := setUnitProperties(m.dbus, unitName, properties...) + if needsThaw { + if err := m.doFreeze(configs.Thawed); err != nil { + logrus.Infof("thaw container after SetUnitProperties failed: %v", err) + } } - - setErr := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...) - - _ = m.doFreeze(targetFreezerState) if setErr != nil { return setErr }