Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ require (
github.com/google/btree v1.1.3 // indirect
github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/moby/sys/userns v0.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,6 @@ github.com/minio/minio-go v0.0.0-20190131015406-c8a261de75c1 h1:jw16EimP5oAEM/2w
github.com/minio/minio-go v0.0.0-20190131015406-c8a261de75c1/go.mod h1:vuvdOZLJuf5HmJAJrKV64MmozrSsk+or0PB5dzdfspg=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
Expand Down
12 changes: 4 additions & 8 deletions go/vt/servenv/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,20 @@ limitations under the License.
package servenv

func getCpuUsage() float64 {
value, err := getCgroupCpu()
if err == nil {
if value, err := getCgroupCpu(); err == nil {
return value
}
value, err = getHostCpuUsage()
if err == nil {
if value, err := getHostCpuUsage(); err == nil {
return value
}
return -1
}

func getMemoryUsage() float64 {
value, err := getCgroupMemory()
if err == nil {
if value, err := getCgroupMemory(); err == nil {
return value
}
value, err = getHostMemoryUsage()
if err == nil {
if value, err := getHostMemoryUsage(); err == nil {
return value
}
return -1
Expand Down
119 changes: 38 additions & 81 deletions go/vt/servenv/metrics_cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,86 +23,63 @@ import (
"fmt"
"math"
"runtime"
"sync"
"time"

"github.com/containerd/cgroups"
"github.com/containerd/cgroups/v3/cgroup1"
"github.com/containerd/cgroups/v3/cgroup2"
"github.com/shirou/gopsutil/v4/mem"

"vitess.io/vitess/go/vt/log"
)

var (
cgroup2Manager *cgroup2.Manager
cgroup1Manager cgroup1.Cgroup
lastCpu uint64
lastTime time.Time
once sync.Once
cgroupManager *cgroup2.Manager
lastCpu uint64
lastTime time.Time
errCgroupMetricsNotAvailable = fmt.Errorf("cgroup metrics are not available")
)

func init() {
if cgroups.Mode() == cgroups.Unified {
manager, err := getCgroup2()
if err != nil {
log.Errorf("Failed to init cgroup2 manager: %v", err)
}
cgroup2Manager = manager
lastCpu, err = getCgroup2CpuUsage()
if err != nil {
log.Errorf("Failed to init cgroup2 cpu %v", err)
}
} else {
cgroup, err := getCgroup1()
if err != nil {
log.Errorf("Failed to init cgroup1 manager: %v", err)
}
cgroup1Manager = cgroup
lastCpu, err = getCgroup1CpuUsage()
if err != nil {
log.Errorf("Failed to init cgroup1 cpu %v", err)
}
func setup() {
if cgroups.Mode() != cgroups.Unified {
log.Warning("cgroup metrics are only supported with cgroup v2, will use host metrics")
return
}
lastTime = time.Now()
}

func isCgroupV2() bool {
return cgroups.Mode() == cgroups.Unified
}

func getCgroup1() (cgroup1.Cgroup, error) {
path := cgroup1.NestedPath("")
cgroup, err := cgroup1.Load(path)
manager, err := getCgroupManager()
if err != nil {
log.Warningf("Failed to init cgroup manager for metrics, will use host metrics: %v", err)
}
cgroupManager = manager
lastCpu, err = getCurrentCgroupCpuUsage()
if err != nil {
return nil, fmt.Errorf("cgroup1 manager is nil")
log.Warningf("Failed to get initial cgroup CPU usage: %v", err)
}
return cgroup, nil
lastTime = time.Now()
}

func getCgroup2() (*cgroup2.Manager, error) {
func getCgroupManager() (*cgroup2.Manager, error) {
path, err := cgroup2.NestedGroupPath("")
if err != nil {
return nil, fmt.Errorf("failed to load cgroup2 manager: %w", err)
return nil, fmt.Errorf("failed to build nested cgroup paths: %w", err)
}
cgroupManager, err := cgroup2.Load(path)
if err != nil {
return nil, fmt.Errorf("cgroup2 manager is nil")
return nil, fmt.Errorf("failed to load cgroup manager: %w", err)
}
return cgroupManager, nil
}

func getCgroupCpuUsage() (float64, error) {
once.Do(setup)
var (
currentUsage uint64
err error
)
currentTime := time.Now()
if isCgroupV2() {
currentUsage, err = getCgroup2CpuUsage()
} else {
currentUsage, err = getCgroup1CpuUsage()
}
currentUsage, err = getCurrentCgroupCpuUsage()
if err != nil {
return -1, fmt.Errorf("Could not read cpu usage")
return -1, fmt.Errorf("failed to read current cgroup CPU usage: %w", err)
}
duration := currentTime.Sub(lastTime)
usage, err := getCpuUsageFromSamples(lastCpu, currentUsage, duration)
Expand All @@ -114,27 +91,13 @@ func getCgroupCpuUsage() (float64, error) {
return usage, nil
}

func getCgroupMemoryUsage() (float64, error) {
if isCgroupV2() {
return getCgroup2MemoryUsage()
} else {
return getCgroup1MemoryUsage()
func getCurrentCgroupCpuUsage() (uint64, error) {
if cgroupManager == nil {
return 0, errCgroupMetricsNotAvailable
}
}

func getCgroup1CpuUsage() (uint64, error) {
stat1, err := cgroup1Manager.Stat()
stat1, err := cgroupManager.Stat()
if err != nil {
return 0, fmt.Errorf("failed to get initial CPU stat: %w", err)
}
currentUsage := stat1.CPU.Usage.Total
return currentUsage, nil
}

func getCgroup2CpuUsage() (uint64, error) {
stat1, err := cgroup2Manager.Stat()
if err != nil {
return 0, fmt.Errorf("failed to get initial CPU stat: %w", err)
return 0, fmt.Errorf("failed to get initial cgroup CPU stats: %w", err)
}
currentUsage := stat1.CPU.UsageUsec
return currentUsage, nil
Expand All @@ -154,20 +117,14 @@ func getCpuUsageFromSamples(usage1 uint64, usage2 uint64, interval time.Duration
return cpuUsage, nil
}

func getCgroup1MemoryUsage() (float64, error) {
stats, err := cgroup1Manager.Stat()
if err != nil {
return -1, fmt.Errorf("failed to get cgroup2 stats: %w", err)
func getCgroupMemoryUsage() (float64, error) {
once.Do(setup)
if cgroupManager == nil {
return -1, errCgroupMetricsNotAvailable
}
usage := stats.Memory.Usage.Usage
limit := stats.Memory.Usage.Limit
return computeMemoryUsage(usage, limit)
}

func getCgroup2MemoryUsage() (float64, error) {
stats, err := cgroup2Manager.Stat()
stats, err := cgroupManager.Stat()
if err != nil {
return -1, fmt.Errorf("failed to get cgroup2 stats: %w", err)
return -1, fmt.Errorf("failed to get cgroup stats: %w", err)
}
usage := stats.Memory.Usage
limit := stats.Memory.UsageLimit
Expand All @@ -176,15 +133,15 @@ func getCgroup2MemoryUsage() (float64, error) {

func computeMemoryUsage(usage uint64, limit uint64) (float64, error) {
if usage == 0 || usage == math.MaxUint64 {
return -1, fmt.Errorf("Failed to find memory usage with invalid value: %d", usage)
return -1, fmt.Errorf("invalid memory usage value: %d", usage)
}
if limit == 0 {
return -1, fmt.Errorf("Failed to compute memory usage with invalid limit: %d", limit)
return -1, fmt.Errorf("invalid memory limit: %d", limit)
}
if limit == math.MaxUint64 {
vmem, err := mem.VirtualMemory()
if err != nil {
return -1, fmt.Errorf("Failed to fall back to system max memory: %w", err)
return -1, fmt.Errorf("failed to get virtual memory stats: %w", err)
}
limit = vmem.Total
}
Expand Down
24 changes: 24 additions & 0 deletions go/vt/servenv/metrics_cgroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ package servenv

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestGetCGroupCpuUsageMetrics(t *testing.T) {
Expand All @@ -35,3 +37,25 @@ func TestGetCgroupMemoryUsageMetrics(t *testing.T) {
validateMem(t, mem, err)
t.Logf("mem %.5f", mem)
}

func TestErrHandlingWithCgroups(t *testing.T) {
origCgroupManager := cgroupManager
defer func() {
cgroupManager = origCgroupManager
}()

cpu, err := getCgroupCpuUsage()
validateCpu(t, cpu, err)
mem, err := getCgroupMemoryUsage()
validateMem(t, mem, err)

cgroupManager = nil
require.Nil(t, cgroupManager)

cpu, err = getCgroupCpuUsage()
require.ErrorContains(t, err, errCgroupMetricsNotAvailable.Error())
require.Equal(t, int(cpu), -1)
mem, err = getCgroupMemoryUsage()
require.ErrorContains(t, err, errCgroupMetricsNotAvailable.Error())
require.Equal(t, int(mem), -1)
}
4 changes: 2 additions & 2 deletions go/vt/servenv/metrics_nonlinux.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ import (
)

func getCgroupCpu() (float64, error) {
return -1, fmt.Errorf("Cgroup not supported on nonlinux platform")
return -1, fmt.Errorf("cgroups not supported on nonlinux platforms")
}

func getCgroupMemory() (float64, error) {
return -1, fmt.Errorf("Cgroup not supported on nonlinux platform")
return -1, fmt.Errorf("cgroups not supported on nonlinux platforms")
}
Loading