Skip to content

Commit 0c080a0

Browse files
authored
across: Implement metrics API (kadalu#643)
Updates: kadalu#8 Signed-off-by: Shree Vatsa N <[email protected]>
1 parent cfe946d commit 0c080a0

25 files changed

+659
-138
lines changed

csi/controllerserver.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ def ListVolumes(self, request, context):
517517
"volume_id": value.get("name"),
518518
"capacity_bytes": value.get("size"),
519519
}
520-
} for value in pvcs]
520+
} for value in pvcs if value is not None]
521521

522522
return csi_pb2.ListVolumesResponse(entries=entries,
523523
next_token=next_token)

csi/exporter.py

+131-119
Original file line numberDiff line numberDiff line change
@@ -1,128 +1,140 @@
11
import os
2-
import pathlib
3-
import time
42
import logging
5-
6-
from prometheus_client.core import GaugeMetricFamily, \
7-
CounterMetricFamily, REGISTRY
8-
from prometheus_client import start_http_server
9-
10-
from volumeutils import (HOSTVOL_MOUNTDIR, PV_TYPE_SUBVOL,
11-
yield_pvc_from_mntdir)
3+
import uvicorn
4+
from fastapi import FastAPI
5+
from volumeutils import HOSTVOL_MOUNTDIR, yield_pvc_from_mntdir
126
from kadalulib import logging_setup, logf
137

14-
15-
class CsiMetricsCollector(object):
16-
def collect(self):
17-
# TODO: Add more labels
18-
capacity_labels = ['storage_name']
19-
capacity_bytes = GaugeMetricFamily(
20-
'kadalu_storage_capacity_bytes',
21-
'Kadalu Storage Capacity',
22-
labels=capacity_labels
23-
)
24-
capacity_used_bytes = GaugeMetricFamily(
25-
'kadalu_storage_capacity_used_bytes',
26-
'Kadalu Storage Used Capacity',
27-
labels=capacity_labels
28-
)
29-
capacity_free_bytes = GaugeMetricFamily(
30-
'kadalu_storage_capacity_free_bytes',
31-
'Kadalu Storage Free Capacity',
32-
labels=capacity_labels
33-
)
34-
inodes_count = CounterMetricFamily(
35-
'kadalu_storage_inodes_count',
36-
'Kadalu Storage Inodes Count',
37-
labels=capacity_labels
38-
)
39-
inodes_used_count = CounterMetricFamily(
40-
'kadalu_storage_inodes_used_count',
41-
'Kadalu Storage Inodes used Count',
42-
labels=capacity_labels
43-
)
44-
inodes_free_count = CounterMetricFamily(
45-
'kadalu_storage_inodes_free_count',
46-
'Kadalu Storage Inodes free Count',
47-
labels=capacity_labels
48-
)
49-
pv_capacity_bytes = GaugeMetricFamily(
50-
'kadalu_storage_pv_capacity_bytes',
51-
'Kadalu Storage PV Capacity',
52-
labels=capacity_labels+["pv"]
53-
)
54-
pv_capacity_used_bytes = GaugeMetricFamily(
55-
'kadalu_storage_pv_capacity_used_bytes',
56-
'Kadalu Storage PV Used Capacity',
57-
labels=capacity_labels+["pv"]
58-
)
59-
pv_capacity_free_bytes = GaugeMetricFamily(
60-
'kadalu_storage_pv_capacity_free_bytes',
61-
'Kadalu Storage PV Free Capacity',
62-
labels=capacity_labels+["pv"]
63-
)
64-
65-
for dirname in os.listdir(HOSTVOL_MOUNTDIR):
66-
labels = [dirname] # TODO: Add more labels
67-
pth = os.path.join(HOSTVOL_MOUNTDIR, dirname)
68-
if os.path.ismount(pth):
69-
stat = os.statvfs(pth)
70-
71-
# Capacity
72-
total = stat.f_bsize * stat.f_blocks
73-
free = stat.f_bsize * stat.f_bavail
74-
used = total - free
75-
capacity_bytes.add_metric(labels, total)
76-
capacity_free_bytes.add_metric(labels, free)
77-
capacity_used_bytes.add_metric(labels, used)
78-
79-
# Inodes
80-
total = stat.f_files
81-
free = stat.f_favail
82-
used = total - free
83-
inodes_count.add_metric(labels, total)
84-
inodes_free_count.add_metric(labels, free)
85-
inodes_used_count.add_metric(labels, used)
86-
87-
# Gathers capacity metrics for each subvol
88-
for pvc in yield_pvc_from_mntdir(os.path.join(pth, "info")):
89-
if pvc is None:
90-
continue
91-
pvcpath_full = os.path.join(pth, pvc.get("path_prefix"),
92-
pvc.get("name"))
93-
pvclabels = labels + [pvc.get("name")]
94-
95-
stat = os.statvfs(pvcpath_full)
96-
97-
# Capacity
98-
total = stat.f_bsize * stat.f_blocks
99-
free = stat.f_bsize * stat.f_bavail
100-
used = total - free
101-
pv_capacity_bytes.add_metric(pvclabels, total)
102-
pv_capacity_free_bytes.add_metric(pvclabels, free)
103-
pv_capacity_used_bytes.add_metric(pvclabels, used)
104-
105-
yield capacity_bytes
106-
yield capacity_free_bytes
107-
yield capacity_used_bytes
108-
yield inodes_count
109-
yield inodes_free_count
110-
yield inodes_used_count
111-
yield pv_capacity_bytes
112-
yield pv_capacity_used_bytes
113-
yield pv_capacity_free_bytes
114-
115-
116-
REGISTRY.register(CsiMetricsCollector())
8+
app = FastAPI()
9+
10+
@app.get("/_api/metrics")
11+
def metrics():
12+
"""
13+
Gathers storage and pvcs metrics.
14+
Starts process by exposing the data collected in port 8050 at '/_api/metrics'.
15+
"""
16+
17+
data = {
18+
"pod": {},
19+
"storages": []
20+
}
21+
22+
memory_usage_in_bytes = 0
23+
cpu_usage_in_nanoseconds = 0
24+
25+
memory_usage_file_path = '/sys/fs/cgroup/memory/memory.usage_in_bytes'
26+
with open(memory_usage_file_path, 'r') as memory_fd:
27+
memory_usage_in_bytes = int(memory_fd.read().strip())
28+
29+
cpu_usage_file_path = '/sys/fs/cgroup/cpu/cpuacct.usage'
30+
with open(cpu_usage_file_path, 'r') as cpu_fd:
31+
cpu_usage_in_nanoseconds = int(cpu_fd.read().strip())
32+
33+
data["pod"] = {
34+
"memory_usage_in_bytes": memory_usage_in_bytes,
35+
"cpu_usage_in_nanoseconds": cpu_usage_in_nanoseconds
36+
}
37+
38+
if os.environ.get("CSI_ROLE", "-") == "nodeplugin":
39+
pod_name_path = '/etc/hostname'
40+
with open(pod_name_path, 'r') as pod_fd:
41+
pod_name = pod_fd.read().strip()
42+
data["pod"].update({"pod_name": pod_name})
43+
44+
# Handle condition for no storage & PVC,
45+
# sometimes storage name is not shown at /mnt until server is mounted.
46+
if len(os.listdir(HOSTVOL_MOUNTDIR)) == 0:
47+
logging.debug(logf(
48+
"No storage-pool found! Try again by creating a storage.",
49+
HOSTVOL_MOUNTDIR=HOSTVOL_MOUNTDIR
50+
))
51+
return data
52+
53+
# Gathers metrics for each storage
54+
for dirname in os.listdir(HOSTVOL_MOUNTDIR):
55+
storage_path = os.path.join(HOSTVOL_MOUNTDIR, dirname)
56+
57+
if os.path.ismount(storage_path):
58+
59+
stat = os.statvfs(storage_path)
60+
61+
# Storage Capacity
62+
total_capacity_bytes = stat.f_bsize * stat.f_blocks
63+
free_capacity_bytes = stat.f_bsize * stat.f_bavail
64+
used_capacity_bytes = total_capacity_bytes - free_capacity_bytes
65+
66+
# Storage Inodes
67+
total_inodes = stat.f_files
68+
free_inodes = stat.f_favail
69+
used_inodes = total_inodes - free_inodes
70+
71+
storage = {
72+
"name": dirname,
73+
"total_capacity_bytes": total_capacity_bytes,
74+
"free_capacity_bytes": free_capacity_bytes,
75+
"used_capacity_bytes": used_capacity_bytes,
76+
"total_inodes": total_inodes,
77+
"free_inodes": free_inodes,
78+
"used_inodes": used_inodes,
79+
"pvc": []
80+
}
81+
82+
storage_info_path = os.path.join(storage_path, "info")
83+
if not os.path.exists(storage_info_path):
84+
data["storages"].append(storage)
85+
logging.warning(logf(
86+
"No PVC found. Sending only storage metrics"
87+
))
88+
return data
89+
90+
# Gathers metrics for each subvol[PVC]
91+
for pvc in yield_pvc_from_mntdir(storage_info_path):
92+
93+
# Handle condition when PVC is created and then deleted,
94+
# Leaving an empty leaf directory with path prefix.
95+
if pvc is None:
96+
logging.warning(logf(
97+
"PVC JSON file not found. PVC must have been deleted. Trying again!"
98+
))
99+
# Skip loop for now and look for any new possible healthy PVC
100+
continue
101+
102+
pvcname = pvc.get("name")
103+
pvcpath = os.path.join(storage_path, pvc.get("path_prefix"), pvcname)
104+
105+
stat = os.statvfs(pvcpath)
106+
107+
# PVC Capacity
108+
total_pvc_capacity_bytes = stat.f_bsize * stat.f_blocks
109+
free_pvc_capacity_bytes = stat.f_bsize * stat.f_bavail
110+
used_pvc_capacity_bytes = total_pvc_capacity_bytes - free_pvc_capacity_bytes
111+
112+
# PVC Inodes
113+
total_pvc_inodes = stat.f_files
114+
free_pvc_inodes = stat.f_favail
115+
used_pvc_inodes = total_pvc_inodes - free_pvc_inodes
116+
117+
pvc = {
118+
"pvc_name": pvcname,
119+
"total_pvc_capacity_bytes": total_pvc_capacity_bytes,
120+
"free_pvc_capacity_bytes": free_pvc_capacity_bytes,
121+
"used_pvc_capacity_bytes": used_pvc_capacity_bytes,
122+
"total_pvc_inodes": total_pvc_inodes,
123+
"free_pvc_inodes": free_pvc_inodes,
124+
"used_pvc_inodes": used_pvc_inodes
125+
}
126+
127+
storage["pvc"].append(pvc)
128+
data["storages"].append(storage)
129+
130+
return data
117131

118132

119133
if __name__ == "__main__":
120-
logging_setup()
121134

122-
start_http_server(8000)
123-
logging.info(
124-
logf("Started Kadalu Storage CSI Metrics exporter.", port=8000)
125-
)
135+
logging_setup()
136+
logging.info(logf(
137+
"Started metrics exporter process at port 8050"
138+
))
126139

127-
while True:
128-
time.sleep(5)
140+
uvicorn.run("exporter:app", host="0.0.0.0", port=8050, log_level="info")

csi/volumeutils.py

+4
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,7 @@ def generate_client_volfile(volname):
908908
Template(content).stream(**data).dump(client_volfile)
909909
return True
910910

911+
911912
def send_signal_to_process(volname, out, sig):
912913
"""Sends the signal to one of the process"""
913914

@@ -1149,6 +1150,7 @@ def mount_glusterfs_with_host(volname, mountpoint, hosts, options=None, is_clien
11491150

11501151
return
11511152

1153+
11521154
def check_external_volume(pv_request, host_volumes):
11531155
"""Mount hosting volume"""
11541156
# Assumption is, this has to have 'hostvol_type' as External.
@@ -1242,6 +1244,7 @@ def yield_pvc_from_mntdir(mntdir):
12421244
yield None
12431245
for child in os.listdir(mntdir):
12441246
name = os.path.join(mntdir, child)
1247+
12451248
if os.path.isdir(name) and len(os.listdir(name)):
12461249
yield from yield_pvc_from_mntdir(name)
12471250
elif name.endswith('json'):
@@ -1290,6 +1293,7 @@ def wrap_pvc(pvc_gen):
12901293
except StopIteration as errmsg:
12911294
return errmsg
12921295

1296+
12931297
def yield_list_of_pvcs(max_entries=0):
12941298
"""Yields list of PVCs limited at 'max_entries'"""
12951299
# List of dicts containing data of PVC from info_file (with extra keys,

helm/kadalu/charts/operator/templates/deployment.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ spec:
1616
metadata:
1717
labels:
1818
name: kadalu
19+
annotations:
20+
prometheus.io/scrape: "true"
21+
prometheus.io/port: "8050"
1922
spec:
2023
serviceAccountName: kadalu-operator
2124
containers:

manifests/kadalu-operator-microk8s.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,9 @@ spec:
282282
metadata:
283283
labels:
284284
name: kadalu
285+
annotations:
286+
prometheus.io/scrape: "true"
287+
prometheus.io/port: "8050"
285288
spec:
286289
serviceAccountName: kadalu-operator
287290
containers:

manifests/kadalu-operator-openshift.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,9 @@ spec:
313313
metadata:
314314
labels:
315315
name: kadalu
316+
annotations:
317+
prometheus.io/scrape: "true"
318+
prometheus.io/port: "8050"
316319
spec:
317320
serviceAccountName: kadalu-operator
318321
containers:

manifests/kadalu-operator-rke.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,9 @@ spec:
282282
metadata:
283283
labels:
284284
name: kadalu
285+
annotations:
286+
prometheus.io/scrape: "true"
287+
prometheus.io/port: "8050"
285288
spec:
286289
serviceAccountName: kadalu-operator
287290
containers:

manifests/kadalu-operator.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,9 @@ spec:
282282
metadata:
283283
labels:
284284
name: kadalu
285+
annotations:
286+
prometheus.io/scrape: "true"
287+
prometheus.io/port: "8050"
285288
spec:
286289
serviceAccountName: kadalu-operator
287290
containers:

operator/Dockerfile

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ COPY lib/kadalulib.py /kadalu/kadalulib.py
4343
COPY cli/kubectl_kadalu/utils.py /kadalu/utils.py
4444
COPY operator/main.py /kadalu/
4545
COPY operator/start.py /kadalu/
46+
COPY operator/metrics.py /kadalu/
47+
COPY operator/exporter.py /kadalu/
4648
COPY cli/build/kubectl-kadalu /usr/bin/kubectl-kadalu
4749
COPY lib/startup.sh /kadalu/startup.sh
4850

0 commit comments

Comments
 (0)