diff --git a/go.mod b/go.mod index c0b146b61fc..3dab69a75d2 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/moby/sys/user v0.3.0 github.com/moby/sys/userns v0.1.0 github.com/mrunalp/fileutils v0.5.1 - github.com/opencontainers/cgroups v0.0.1 + github.com/opencontainers/cgroups v0.0.4 github.com/opencontainers/runtime-spec v1.2.1 github.com/opencontainers/selinux v1.11.1 github.com/seccomp/libseccomp-golang v0.10.0 diff --git a/go.sum b/go.sum index 1503380cef5..60c410620c1 100644 --- a/go.sum +++ b/go.sum @@ -49,8 +49,8 @@ github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q= github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= -github.com/opencontainers/cgroups v0.0.1 h1:MXjMkkFpKv6kpuirUa4USFBas573sSAY082B4CiHEVA= -github.com/opencontainers/cgroups v0.0.1/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs= +github.com/opencontainers/cgroups v0.0.4 h1:XVj8P/IHVms/j+7eh8ggdkTLAxjz84ZzuFyGoE28DR4= +github.com/opencontainers/cgroups v0.0.4/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs= github.com/opencontainers/runtime-spec v1.2.1 h1:S4k4ryNgEpxW1dzyqffOmhI1BHYcjzU8lpJfSlR0xww= github.com/opencontainers/runtime-spec v1.2.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.11.1 h1:nHFvthhM0qY8/m+vfhJylliSshm8G1jJ2jDMcgULaH8= diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 539726dfdc8..94b55eaa857 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -164,6 +164,10 @@ func loadState(root string) (*State, error) { if err := json.NewDecoder(f).Decode(&state); err != nil { return nil, err } + // Cgroup v1 fs manager expect Resources to never be nil. + if state.Config.Cgroups.Resources == nil { + state.Config.Cgroups.Resources = &cgroups.Resources{} + } return state, nil } diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index a2b207109d5..ef7b74fea76 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -328,7 +328,7 @@ convert_hugetlb_size() { check_systemd_value "MemoryHigh" 5242880 check_systemd_value "MemoryMax" 10485760 check_systemd_value "TasksMax" 99 - check_cpu_quota 10000 100000 "100ms" + check_cpu_quota 10000 100000 check_cpu_weight 42 } @@ -390,7 +390,7 @@ convert_hugetlb_size() { [ "$output" = '42' ] check_systemd_value "TasksMax" 42 - check_cpu_quota 5000 50000 "100ms" + check_cpu_quota 5000 50000 check_cpu_weight 42 } diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index 1d69ebabf9d..40b01ccbf03 100755 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -302,7 +302,33 @@ function check_systemd_value() { function check_cpu_quota() { local quota=$1 local period=$2 - local sd_quota=$3 + local sd_quota + + if [ -v RUNC_USE_SYSTEMD ]; then + if [ "$quota" = "-1" ]; then + sd_quota="infinity" + else + # In systemd world, quota (CPUQuotaPerSec) is measured in ms + # (per second), and systemd rounds it up to 10ms. For example, + # given quota=4000 and period=10000, systemd value is 400ms. + # + # Calculate milliseconds (quota/period * 1000). + # First multiply by 1000 to get milliseconds, + # then add half of period for proper rounding. + local ms=$(((quota * 1000 + period / 2) / period)) + # Round up to nearest 10ms. + ms=$(((ms + 5) / 10 * 10)) + sd_quota="${ms}ms" + + # Recalculate quota based on systemd value. + # Convert ms back to quota units. + quota=$((ms * period / 1000)) + + fi + + # Systemd values are the same for v1 and v2. + check_systemd_value "CPUQuotaPerSecUSec" "$sd_quota" + fi if [ -v CGROUP_V2 ]; then if [ "$quota" = "-1" ]; then @@ -313,8 +339,6 @@ function check_cpu_quota() { check_cgroup_value "cpu.cfs_quota_us" "$quota" check_cgroup_value "cpu.cfs_period_us" "$period" fi - # systemd values are the same for v1 and v2 - check_systemd_value "CPUQuotaPerSecUSec" "$sd_quota" # CPUQuotaPeriodUSec requires systemd >= v242 [ "$(systemd_version)" -lt 242 ] && return @@ -344,7 +368,18 @@ function check_cpu_shares() { local shares=$1 if [ -v CGROUP_V2 ]; then - local weight=$((1 + ((shares - 2) * 9999) / 262142)) + # Same formula as ConvertCPUSharesToCgroupV2Value. + local weight + weight=$(awk -v shares="$shares" ' + BEGIN { + if (shares == 0) { print 0; exit } + if (shares <= 2) { print 1; exit } + if (shares >= 262144) { print 10000; exit } + l = log(shares) / log(2) + exponent = (l*l + 125*l) / 612.0 - 7.0/34.0 + print int(exp(exponent * log(10)) + 0.99) + }') + check_cpu_weight "$weight" else check_cgroup_value "cpu.shares" "$shares" diff --git a/tests/integration/update.bats b/tests/integration/update.bats index 034e959fc15..461fac479e3 100644 --- a/tests/integration/update.bats +++ b/tests/integration/update.bats @@ -263,26 +263,26 @@ EOF runc run -d --console-socket "$CONSOLE_SOCKET" test_update [ "$status" -eq 0 ] - # check that initial values were properly set - check_cpu_quota 500000 1000000 "500ms" + # Check that initial values were properly set. + check_cpu_quota 500000 1000000 check_cpu_shares 100 - # update cpu period + # Update cpu period. runc update test_update --cpu-period 900000 [ "$status" -eq 0 ] - check_cpu_quota 500000 900000 "560ms" + check_cpu_quota 500000 900000 - # update cpu quota + # Update cpu quota. runc update test_update --cpu-quota 600000 [ "$status" -eq 0 ] - check_cpu_quota 600000 900000 "670ms" + check_cpu_quota 600000 900000 - # remove cpu quota + # Remove cpu quota. runc update test_update --cpu-quota -1 [ "$status" -eq 0 ] - check_cpu_quota -1 900000 "infinity" + check_cpu_quota -1 900000 - # update cpu-shares + # Update cpu-shares. runc update test_update --cpu-share 200 [ "$status" -eq 0 ] check_cpu_shares 200 @@ -298,21 +298,21 @@ EOF } EOF [ "$status" -eq 0 ] - check_cpu_quota 500000 1000000 "500ms" + check_cpu_quota 500000 1000000 - # redo all the changes at once + # Redo all the changes at once. runc update test_update \ --cpu-period 900000 --cpu-quota 600000 --cpu-share 200 [ "$status" -eq 0 ] - check_cpu_quota 600000 900000 "670ms" + check_cpu_quota 600000 900000 check_cpu_shares 200 - # remove cpu quota and reset the period + # Remove cpu quota and reset the period. runc update test_update --cpu-quota -1 --cpu-period 100000 [ "$status" -eq 0 ] - check_cpu_quota -1 100000 "infinity" + check_cpu_quota -1 100000 - # reset to initial test value via json file + # Reset to initial test values via json file. cat <"$BATS_RUN_TMPDIR"/runc-cgroups-integration-test.json { "cpu": { @@ -326,7 +326,7 @@ EOF runc update -r "$BATS_RUN_TMPDIR"/runc-cgroups-integration-test.json test_update [ "$status" -eq 0 ] - check_cpu_quota 500000 1000000 "500ms" + check_cpu_quota 500000 1000000 check_cpu_shares 100 } @@ -363,7 +363,7 @@ EOF runc run -d --console-socket "$CONSOLE_SOCKET" test_update [ "$status" -eq 0 ] - check_cpu_quota -1 1000000 "infinity" + check_cpu_quota -1 1000000 } @test "set cpu period with no quota (invalid period)" { @@ -382,7 +382,7 @@ EOF runc run -d --console-socket "$CONSOLE_SOCKET" test_update [ "$status" -eq 0 ] - check_cpu_quota 5000 100000 "50ms" + check_cpu_quota 5000 100000 } @test "update cpu period with no previous period/quota set" { @@ -393,10 +393,10 @@ EOF runc run -d --console-socket "$CONSOLE_SOCKET" test_update [ "$status" -eq 0 ] - # update the period alone, no old values were set + # Update the period alone, no old values were set. runc update --cpu-period 50000 test_update [ "$status" -eq 0 ] - check_cpu_quota -1 50000 "infinity" + check_cpu_quota -1 50000 } @test "update cpu quota with no previous period/quota set" { @@ -407,10 +407,10 @@ EOF runc run -d --console-socket "$CONSOLE_SOCKET" test_update [ "$status" -eq 0 ] - # update the quota alone, no old values were set + # Update the quota alone, no old values were set. runc update --cpu-quota 30000 test_update [ "$status" -eq 0 ] - check_cpu_quota 30000 100000 "300ms" + check_cpu_quota 30000 100000 } @test "update cpu period in a pod cgroup with pod limit set" { @@ -445,7 +445,7 @@ EOF # Finally, the test itself: set 30% limit but with lower period. runc update --cpu-period 10000 --cpu-quota 3000 test_update [ "$status" -eq 0 ] - check_cpu_quota 3000 10000 "300ms" + check_cpu_quota 3000 10000 } @test "update cgroup cpu.idle" { @@ -545,10 +545,9 @@ EOF runc run -d --console-socket "$CONSOLE_SOCKET" test_update [ "$status" -eq 0 ] - # check that initial values were properly set - check_cpu_quota 500000 1000000 "500ms" - # initial cpu shares of 100 corresponds to weight of 4 - check_cpu_weight 4 + # Check that initial values (from setup) were properly set. + check_cpu_quota 500000 1000000 + check_cpu_shares 100 check_systemd_value "TasksMax" 20 runc update -r - test_update < '9' { // First non-digit: cut the tail. str = str[:i] @@ -280,7 +280,9 @@ func systemdVersionAtoi(str string) (int, error) { return ver, nil } -func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) { +// addCPUQuota adds CPUQuotaPeriodUSec and CPUQuotaPerSecUSec to the properties. The passed quota may be modified +// along with round-up during calculation in order to write the same value to cgroupfs later. +func addCPUQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota *int64, period uint64) { if period != 0 { // systemd only supports CPUQuotaPeriodUSec since v242 sdVer := systemdVersion(cm) @@ -292,10 +294,10 @@ func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota " (setting will still be applied to cgroupfs)", sdVer) } } - if quota != 0 || period != 0 { + if *quota != 0 || period != 0 { // corresponds to USEC_INFINITY in systemd cpuQuotaPerSecUSec := uint64(math.MaxUint64) - if quota > 0 { + if *quota > 0 { if period == 0 { // assume the default period = defCPUQuotaPeriod @@ -304,9 +306,11 @@ func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota // (integer percentage of CPU) internally. This means that if a fractional percent of // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect. - cpuQuotaPerSecUSec = uint64(quota*1000000) / period + cpuQuotaPerSecUSec = uint64(*quota*1000000) / period if cpuQuotaPerSecUSec%10000 != 0 { cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000 + // Update the requested quota along with the round-up in order to write the same value to cgroupfs. + *quota = int64(cpuQuotaPerSecUSec) * int64(period) / 1000000 } } *properties = append(*properties, diff --git a/vendor/github.com/opencontainers/cgroups/systemd/v1.go b/vendor/github.com/opencontainers/cgroups/systemd/v1.go index 8453e9b4962..b8959adbfa6 100644 --- a/vendor/github.com/opencontainers/cgroups/systemd/v1.go +++ b/vendor/github.com/opencontainers/cgroups/systemd/v1.go @@ -90,7 +90,7 @@ func genV1ResourcesProperties(r *cgroups.Resources, cm *dbusConnManager) ([]syst newProp("CPUShares", r.CpuShares)) } - addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod) + addCPUQuota(cm, &properties, &r.CpuQuota, r.CpuPeriod) if r.BlkioWeight != 0 { properties = append(properties, @@ -334,6 +334,9 @@ func (m *LegacyManager) Set(r *cgroups.Resources) error { if r.Unified != nil { return cgroups.ErrV1NoUnified } + // Use a copy since CpuQuota in r may be modified. + rCopy := *r + r = &rCopy properties, err := genV1ResourcesProperties(r, m.dbus) if err != nil { return err diff --git a/vendor/github.com/opencontainers/cgroups/systemd/v2.go b/vendor/github.com/opencontainers/cgroups/systemd/v2.go index 42a6e355119..636b9cb01b5 100644 --- a/vendor/github.com/opencontainers/cgroups/systemd/v2.go +++ b/vendor/github.com/opencontainers/cgroups/systemd/v2.go @@ -113,7 +113,7 @@ func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props return nil, fmt.Errorf("unified resource %q quota value conversion error: %w", k, err) } } - addCpuQuota(cm, &props, quota, period) + addCPUQuota(cm, &props, "a, period) case "cpu.weight": if shouldSetCPUIdle(cm, strings.TrimSpace(res["cpu.idle"])) { @@ -254,7 +254,7 @@ func genV2ResourcesProperties(dirPath string, r *cgroups.Resources, cm *dbusConn } } - addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod) + addCPUQuota(cm, &properties, &r.CpuQuota, r.CpuPeriod) if r.PidsLimit > 0 || r.PidsLimit == -1 { properties = append(properties, @@ -480,6 +480,9 @@ func (m *UnifiedManager) Set(r *cgroups.Resources) error { if r == nil { return nil } + // Use a copy since CpuQuota in r may be modified. + rCopy := *r + r = &rCopy properties, err := genV2ResourcesProperties(m.fsMgr.Path(""), r, m.dbus) if err != nil { return err diff --git a/vendor/github.com/opencontainers/cgroups/utils.go b/vendor/github.com/opencontainers/cgroups/utils.go index 9ef24b1ac6e..95b3310ab6e 100644 --- a/vendor/github.com/opencontainers/cgroups/utils.go +++ b/vendor/github.com/opencontainers/cgroups/utils.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "math" "os" "path/filepath" "strconv" @@ -231,7 +232,7 @@ func rmdir(path string, retry bool) error { again: err := unix.Rmdir(path) - switch err { // nolint:errorlint // unix errors are bare + switch err { case nil, unix.ENOENT: return nil case unix.EINTR: @@ -395,7 +396,7 @@ func WriteCgroupProc(dir string, pid int) error { } defer file.Close() - for i := 0; i < 5; i++ { + for range 5 { _, err = file.WriteString(strconv.Itoa(pid)) if err == nil { return nil @@ -413,16 +414,30 @@ func WriteCgroupProc(dir string, pid int) error { return err } -// Since the OCI spec is designed for cgroup v1, in some cases -// there is need to convert from the cgroup v1 configuration to cgroup v2 -// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142) -// convert from [2-262144] to [1-10000] -// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)" +// ConvertCPUSharesToCgroupV2Value converts CPU shares, used by cgroup v1, +// to CPU weight, used by cgroup v2. +// +// Cgroup v1 CPU shares has a range of [2^1...2^18], i.e. [2...262144], +// and the default value is 1024. +// +// Cgroup v2 CPU weight has a range of [10^0...10^4], i.e. [1...10000], +// and the default value is 100. func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 { + // The value of 0 means "unset". if cpuShares == 0 { return 0 } - return (1 + ((cpuShares-2)*9999)/262142) + if cpuShares <= 2 { + return 1 + } + if cpuShares >= 262144 { + return 10000 + } + l := math.Log2(float64(cpuShares)) + // Quadratic function which fits min, max, and default. + exponent := (l*l+125*l)/612.0 - 7.0/34.0 + + return uint64(math.Ceil(math.Pow(10, exponent))) } // ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec diff --git a/vendor/github.com/opencontainers/cgroups/v1_utils.go b/vendor/github.com/opencontainers/cgroups/v1_utils.go index 81193e20983..19b8af1344b 100644 --- a/vendor/github.com/opencontainers/cgroups/v1_utils.go +++ b/vendor/github.com/opencontainers/cgroups/v1_utils.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path/filepath" + "slices" "strings" "sync" "syscall" @@ -144,10 +145,8 @@ func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) { for _, mi := range mounts { if strings.HasPrefix(mi.Mountpoint, cgroupPath) { - for _, opt := range strings.Split(mi.VFSOptions, ",") { - if opt == subsystem { - return mi.Mountpoint, mi.Root, nil - } + if slices.Contains(strings.Split(mi.VFSOptions, ","), subsystem) { + return mi.Mountpoint, mi.Root, nil } } } diff --git a/vendor/modules.txt b/vendor/modules.txt index 0ead139c0c5..c7f7491d3de 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -51,7 +51,7 @@ github.com/moby/sys/userns # github.com/mrunalp/fileutils v0.5.1 ## explicit; go 1.13 github.com/mrunalp/fileutils -# github.com/opencontainers/cgroups v0.0.1 +# github.com/opencontainers/cgroups v0.0.4 ## explicit; go 1.23.0 github.com/opencontainers/cgroups github.com/opencontainers/cgroups/devices