diff --git a/info/v1/machine.go b/info/v1/machine.go index 6df86a266e..0aa742dd86 100644 --- a/info/v1/machine.go +++ b/info/v1/machine.go @@ -123,6 +123,9 @@ type HugePagesInfo struct { // number of huge pages NumPages uint64 `json:"num_pages"` + + // number of free huge pages (nil if unavailable) + FreePages *uint64 `json:"free_pages,omitempty"` } type DiskInfo struct { diff --git a/metrics/prometheus_fake.go b/metrics/prometheus_fake.go index ab48cce860..4469bb4cb8 100644 --- a/metrics/prometheus_fake.go +++ b/metrics/prometheus_fake.go @@ -58,12 +58,14 @@ func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, erro Memory: 33604804608, HugePages: []info.HugePagesInfo{ { - PageSize: uint64(1048576), - NumPages: uint64(0), + PageSize: uint64(1048576), + NumPages: uint64(0), + FreePages: ptrUint64(0), }, { - PageSize: uint64(2048), - NumPages: uint64(0), + PageSize: uint64(2048), + NumPages: uint64(0), + FreePages: ptrUint64(0), }, }, Cores: []info.Core{ @@ -163,12 +165,14 @@ func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, erro Memory: 33604804606, HugePages: []info.HugePagesInfo{ { - PageSize: uint64(1048576), - NumPages: uint64(2), + PageSize: uint64(1048576), + NumPages: uint64(2), + FreePages: ptrUint64(1), }, { - PageSize: uint64(2048), - NumPages: uint64(4), + PageSize: uint64(2048), + NumPages: uint64(4), + FreePages: ptrUint64(3), }, }, Cores: []info.Core{ @@ -811,6 +815,10 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req }, nil } +func ptrUint64(v uint64) *uint64 { + return &v +} + type erroringSubcontainersInfoProvider struct { successfulProvider testSubcontainersInfoProvider shouldFail bool diff --git a/metrics/prometheus_machine.go b/metrics/prometheus_machine.go index 2c3e96a12d..4de42e924d 100644 --- a/metrics/prometheus_machine.go +++ b/metrics/prometheus_machine.go @@ -216,6 +216,15 @@ func NewPrometheusMachineCollector(i infoProvider, includedMetrics container.Met return getHugePagesCount(machineInfo) }, }, + { + name: "machine_node_hugepages_free", + help: "Number of free hugepages on NUMA node.", + valueType: prometheus.GaugeValue, + extraLabels: []string{prometheusNodeLabelName, prometheusPageSizeLabelName}, + getValues: func(machineInfo *info.MachineInfo) metricValues { + return getHugePagesFree(machineInfo) + }, + }, { name: "machine_node_distance", help: "Distance between NUMA node and target NUMA node.", @@ -353,6 +362,25 @@ func getHugePagesCount(machineInfo *info.MachineInfo) metricValues { return mValues } +func getHugePagesFree(machineInfo *info.MachineInfo) metricValues { + mValues := make(metricValues, 0) + for _, node := range machineInfo.Topology { + nodeID := strconv.Itoa(node.Id) + + for _, hugePage := range node.HugePages { + if hugePage.FreePages != nil { + mValues = append(mValues, + metricValue{ + value: float64(*hugePage.FreePages), + labels: []string{nodeID, strconv.FormatUint(hugePage.PageSize, 10)}, + timestamp: machineInfo.Timestamp, + }) + } + } + } + return mValues +} + func getCaches(machineInfo *info.MachineInfo) metricValues { mValues := make(metricValues, 0) for _, node := range machineInfo.Topology { diff --git a/metrics/prometheus_machine_test.go b/metrics/prometheus_machine_test.go index bb0e8d7827..9c33fbebad 100644 --- a/metrics/prometheus_machine_test.go +++ b/metrics/prometheus_machine_test.go @@ -188,6 +188,22 @@ func TestGetHugePagesCount(t *testing.T) { assertMetricValues(t, expectedMetricVals, metricVals, "Unexpected information about Node memory") } +func TestGetHugePagesFree(t *testing.T) { + machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo() + assert.Nil(t, err) + + metricVals := getHugePagesFree(machineInfo) + + assert.Equal(t, 4, len(metricVals)) + expectedMetricVals := []metricValue{ + {value: 0, labels: []string{"0", "1048576"}, timestamp: time.Unix(1395066363, 0)}, + {value: 0, labels: []string{"0", "2048"}, timestamp: time.Unix(1395066363, 0)}, + {value: 1, labels: []string{"1", "1048576"}, timestamp: time.Unix(1395066363, 0)}, + {value: 3, labels: []string{"1", "2048"}, timestamp: time.Unix(1395066363, 0)}, + } + assertMetricValues(t, expectedMetricVals, metricVals, "Unexpected information about free hugepages") +} + func TestGetDistance(t *testing.T) { machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo() assert.Nil(t, err) diff --git a/metrics/testdata/prometheus_machine_metrics b/metrics/testdata/prometheus_machine_metrics index 60043e5ea0..d5a8cee817 100644 --- a/metrics/testdata/prometheus_machine_metrics +++ b/metrics/testdata/prometheus_machine_metrics @@ -63,6 +63,12 @@ machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test" machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test",node_id="0",page_size="2048",system_uuid="system-uuid-test"} 0 1395066363000 machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test",node_id="1",page_size="1048576",system_uuid="system-uuid-test"} 2 1395066363000 machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test",node_id="1",page_size="2048",system_uuid="system-uuid-test"} 4 1395066363000 +# HELP machine_node_hugepages_free Number of free hugepages on NUMA node. +# TYPE machine_node_hugepages_free gauge +machine_node_hugepages_free{boot_id="boot-id-test",machine_id="machine-id-test",node_id="0",page_size="1048576",system_uuid="system-uuid-test"} 0 1395066363000 +machine_node_hugepages_free{boot_id="boot-id-test",machine_id="machine-id-test",node_id="0",page_size="2048",system_uuid="system-uuid-test"} 0 1395066363000 +machine_node_hugepages_free{boot_id="boot-id-test",machine_id="machine-id-test",node_id="1",page_size="1048576",system_uuid="system-uuid-test"} 1 1395066363000 +machine_node_hugepages_free{boot_id="boot-id-test",machine_id="machine-id-test",node_id="1",page_size="2048",system_uuid="system-uuid-test"} 3 1395066363000 # HELP machine_node_memory_capacity_bytes Amount of memory assigned to NUMA node. # TYPE machine_node_memory_capacity_bytes gauge machine_node_memory_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test"} 3.3604804608e+10 1395066363000 diff --git a/utils/sysfs/fakesysfs/fake.go b/utils/sysfs/fakesysfs/fake.go index 3ef4ca6329..08585766a6 100644 --- a/utils/sysfs/fakesysfs/fake.go +++ b/utils/sysfs/fakesysfs/fake.go @@ -82,6 +82,9 @@ type FakeSysFs struct { hugePagesNr map[string]string hugePagesNrErr error + hugePagesFree map[string]string + hugePagesFreeErr error + distances map[string]string distancesErr error @@ -125,6 +128,11 @@ func (fs *FakeSysFs) GetHugePagesNr(hugepagesDirectory string, hugePageName stri return fs.hugePagesNr[hugePageFile], fs.hugePagesNrErr } +func (fs *FakeSysFs) GetHugePagesFree(hugepagesDirectory string, hugePageName string) (string, error) { + hugePageFile := fmt.Sprintf("%s%s/%s", hugepagesDirectory, hugePageName, sysfs.HugePagesFreeFile) + return fs.hugePagesFree[hugePageFile], fs.hugePagesFreeErr +} + func (fs *FakeSysFs) GetBlockDevices() ([]os.FileInfo, error) { fs.info.EntryName = "sda" return []os.FileInfo{&fs.info}, nil @@ -224,6 +232,11 @@ func (fs *FakeSysFs) SetHugePagesNr(hugePagesNr map[string]string, err error) { fs.hugePagesNrErr = err } +func (fs *FakeSysFs) SetHugePagesFree(hugePagesFree map[string]string, err error) { + fs.hugePagesFree = hugePagesFree + fs.hugePagesFreeErr = err +} + func (fs *FakeSysFs) SetEntryName(name string) { fs.info.EntryName = name } diff --git a/utils/sysfs/sysfs.go b/utils/sysfs/sysfs.go index a42876e80a..725ebf0b7a 100644 --- a/utils/sysfs/sysfs.go +++ b/utils/sysfs/sysfs.go @@ -67,6 +67,8 @@ const ( //HugePagesNrFile name of nr_hugepages file in sysfs HugePagesNrFile = "nr_hugepages" + //HugePagesFreeFile name of free_hugepages file in sysfs + HugePagesFreeFile = "free_hugepages" ) var ( @@ -106,6 +108,8 @@ type SysFs interface { GetHugePagesInfo(hugePagesDirectory string) ([]os.FileInfo, error) // Get hugepage_nr from specified directory GetHugePagesNr(hugePagesDirectory string, hugePageName string) (string, error) + // Get free_hugepages from specified directory + GetHugePagesFree(hugePagesDirectory string, hugePageName string) (string, error) // Get directory information for available block devices. GetBlockDevices() ([]os.FileInfo, error) // Get Size of a given block device. @@ -230,6 +234,15 @@ func (fs *realSysFs) GetHugePagesNr(hugepagesDirectory string, hugePageName stri return strings.TrimSpace(string(hugePageFile)), err } +func (fs *realSysFs) GetHugePagesFree(hugepagesDirectory string, hugePageName string) (string, error) { + hugePageFilePath := fmt.Sprintf("%s%s/%s", hugepagesDirectory, hugePageName, HugePagesFreeFile) + hugePageFile, err := os.ReadFile(hugePageFilePath) + if err != nil { + return "", err + } + return strings.TrimSpace(string(hugePageFile)), err +} + func (fs *realSysFs) GetBlockDevices() ([]os.FileInfo, error) { dirs, err := os.ReadDir(blockDir) if err != nil { diff --git a/utils/sysfs/sysfs_test.go b/utils/sysfs/sysfs_test.go index 22874e7059..864ddee0eb 100644 --- a/utils/sysfs/sysfs_test.go +++ b/utils/sysfs/sysfs_test.go @@ -124,6 +124,23 @@ func TestGetHugePagesNrWhenFileIsMissing(t *testing.T) { assert.Equal(t, "", rawHugePageNr) } +func TestGetHugePagesFree(t *testing.T) { + sysFs := NewRealSysFs() + rawHugePageFree, err := sysFs.GetHugePagesFree("./testdata/node0/hugepages/", "hugepages-1048576kB") + assert.Nil(t, err) + + hugePageFree, err := strconv.Atoi(rawHugePageFree) + assert.Nil(t, err) + assert.Equal(t, 1, hugePageFree) +} + +func TestGetHugePagesFreeWhenFileIsMissing(t *testing.T) { + sysFs := NewRealSysFs() + rawHugePageFree, err := sysFs.GetHugePagesFree("./testdata/node1/hugepages/", "hugepages-1048576kB") + assert.NotNil(t, err) + assert.Equal(t, "", rawHugePageFree) +} + func TestIsCPUOnline(t *testing.T) { sysFs := &realSysFs{ cpuPath: "./testdata_epyc7402_nohyperthreading", diff --git a/utils/sysfs/testdata/node0/hugepages/hugepages-1048576kB/free_hugepages b/utils/sysfs/testdata/node0/hugepages/hugepages-1048576kB/free_hugepages new file mode 100644 index 0000000000..d00491fd7e --- /dev/null +++ b/utils/sysfs/testdata/node0/hugepages/hugepages-1048576kB/free_hugepages @@ -0,0 +1 @@ +1 diff --git a/utils/sysfs/testdata/node0/hugepages/hugepages-2048kB/free_hugepages b/utils/sysfs/testdata/node0/hugepages/hugepages-2048kB/free_hugepages new file mode 100644 index 0000000000..573541ac97 --- /dev/null +++ b/utils/sysfs/testdata/node0/hugepages/hugepages-2048kB/free_hugepages @@ -0,0 +1 @@ +0 diff --git a/utils/sysinfo/sysinfo.go b/utils/sysinfo/sysinfo.go index d2e8b6c943..3097a0330d 100644 --- a/utils/sysinfo/sysinfo.go +++ b/utils/sysinfo/sysinfo.go @@ -192,9 +192,21 @@ func GetHugePagesInfo(sysFs sysfs.SysFs, hugepagesDirectory string) ([]info.Huge return hugePagesInfo, fmt.Errorf("could not parse file nr_hugepage for %s, contents %q", st.Name(), string(val)) } + // Read free_hugepages (optional - nil if unavailable) + var freePages *uint64 + freeVal, err := sysFs.GetHugePagesFree(hugepagesDirectory, st.Name()) + if err == nil && freeVal != "" { + var fp uint64 + n, err = fmt.Sscanf(string(freeVal), "%d", &fp) + if err == nil && n == 1 { + freePages = &fp + } + } + hugePagesInfo = append(hugePagesInfo, info.HugePagesInfo{ - NumPages: numPages, - PageSize: pageSize, + NumPages: numPages, + PageSize: pageSize, + FreePages: freePages, }) } return hugePagesInfo, nil diff --git a/utils/sysinfo/sysinfo_test.go b/utils/sysinfo/sysinfo_test.go index 0ff97443ea..ddc5473b4c 100644 --- a/utils/sysinfo/sysinfo_test.go +++ b/utils/sysinfo/sysinfo_test.go @@ -42,9 +42,25 @@ func TestGetHugePagesInfo(t *testing.T) { } fakeSys.SetHugePagesNr(hugePageNr, nil) + hugePageFree := map[string]string{ + "/fakeSysfs/devices/system/node/node0/hugepages/hugepages-2048kB/free_hugepages": "1", + "/fakeSysfs/devices/system/node/node0/hugepages/hugepages-1048576kB/free_hugepages": "0", + } + fakeSys.SetHugePagesFree(hugePageFree, nil) + hugePagesInfo, err := GetHugePagesInfo(&fakeSys, "/fakeSysfs/devices/system/node/node0/hugepages/") assert.Nil(t, err) assert.Equal(t, 2, len(hugePagesInfo)) + // Verify FreePages is correctly read + for _, hp := range hugePagesInfo { + if hp.PageSize == 2048 { + assert.NotNil(t, hp.FreePages) + assert.Equal(t, uint64(1), *hp.FreePages) + } else if hp.PageSize == 1048576 { + assert.NotNil(t, hp.FreePages) + assert.Equal(t, uint64(0), *hp.FreePages) + } + } } func TestGetHugePagesInfoWithHugePagesDirectory(t *testing.T) {