Skip to content

Commit 2815fef

Browse files
committed
Detect if the host needs to be rebooted
And expose it via Prometheus: # HELP comin_host_info Info of the host. # TYPE comin_host_info gauge comin_host_info{need_to_reboot="1"} 1
1 parent aab5ac5 commit 2815fef

File tree

3 files changed

+53
-1
lines changed

3 files changed

+53
-1
lines changed

internal/manager/manager.go

+6
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ type Manager struct {
4444
// for a first iteration: this needs to be removed
4545
isRunning bool
4646
needToBeRestarted bool
47+
needToReboot bool
4748
cominServiceRestartFunc func() error
4849

4950
evalFunc generation.EvalFunc
@@ -153,6 +154,8 @@ func (m Manager) onDeployment(ctx context.Context, deploymentResult deployment.D
153154
if getsEvicted && evicted.ProfilePath != "" {
154155
profile.RemoveProfilePath(evicted.ProfilePath)
155156
}
157+
m.needToReboot = utils.NeedToReboot()
158+
m.prometheus.SetHostInfo(m.needToReboot)
156159
return m
157160
}
158161

@@ -211,6 +214,9 @@ func (m Manager) Run() {
211214
logrus.Infof(" machineId = %s", m.machineId)
212215
logrus.Infof(" repositoryPath = %s", m.repositoryPath)
213216

217+
m.needToReboot = utils.NeedToReboot()
218+
m.prometheus.SetHostInfo(m.needToReboot)
219+
214220
for {
215221
select {
216222
case <-m.stateRequestCh:

internal/prometheus/prometheus.go

+19-1
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,15 @@ type Prometheus struct {
1212
buildInfo *prometheus.GaugeVec
1313
deploymentInfo *prometheus.GaugeVec
1414
fetchCounter *prometheus.CounterVec
15+
hostInfo *prometheus.GaugeVec
1516
}
1617

1718
func New() Prometheus {
1819
promReg := prometheus.NewRegistry()
1920
buildInfo := prometheus.NewGaugeVec(prometheus.GaugeOpts{
2021
Name: "comin_build_info",
2122
Help: "Build info for comin.",
22-
}, []string{"version"})
23+
}, []string{"version"})
2324
deploymentInfo := prometheus.NewGaugeVec(prometheus.GaugeOpts{
2425
Name: "comin_deployment_info",
2526
Help: "Info of the last deployment.",
@@ -28,14 +29,20 @@ func New() Prometheus {
2829
Name: "comin_fetch_count",
2930
Help: "Number of fetches per status",
3031
}, []string{"remote_name", "status"})
32+
hostInfo := prometheus.NewGaugeVec(prometheus.GaugeOpts{
33+
Name: "comin_host_info",
34+
Help: "Info of the host.",
35+
}, []string{"need_to_reboot"})
3136
promReg.MustRegister(buildInfo)
3237
promReg.MustRegister(deploymentInfo)
3338
promReg.MustRegister(fetchCounter)
39+
promReg.MustRegister(hostInfo)
3440
return Prometheus{
3541
promRegistry: promReg,
3642
buildInfo: buildInfo,
3743
deploymentInfo: deploymentInfo,
3844
fetchCounter: fetchCounter,
45+
hostInfo: hostInfo,
3946
}
4047
}
4148

@@ -60,3 +67,14 @@ func (m Prometheus) SetDeploymentInfo(commitId, status string) {
6067
m.deploymentInfo.Reset()
6168
m.deploymentInfo.With(prometheus.Labels{"commit_id": commitId, "status": status}).Set(1)
6269
}
70+
71+
func (m Prometheus) SetHostInfo(needToReboot bool) {
72+
m.hostInfo.Reset()
73+
var value string
74+
if needToReboot {
75+
value = "1"
76+
} else {
77+
value = "0"
78+
}
79+
m.hostInfo.With(prometheus.Labels{"need_to_reboot": value}).Set(1)
80+
}

internal/utils/reboot.go

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package utils
2+
3+
import (
4+
"os"
5+
6+
"github.com/sirupsen/logrus"
7+
)
8+
9+
// NeedToReboot return true when the current deployed kernel is not
10+
// the booted kernel. Note we should implement something smarter such
11+
// as described in
12+
// https://discourse.nixos.org/t/nixos-needsreboot-determine-if-you-need-to-reboot-your-nixos-machine/40790
13+
func NeedToReboot() (reboot bool) {
14+
current, err := os.Readlink("/run/current-system/kernel")
15+
if err != nil {
16+
logrus.Errorf("Failed to read the symlink /run/current-system/kernel: %s", err)
17+
return
18+
}
19+
booted, err := os.Readlink("/run/booted-system/kernel")
20+
if err != nil {
21+
logrus.Errorf("Failed to read the symlink /run/booted-system/kernel: %s", err)
22+
return
23+
}
24+
if current != booted {
25+
reboot = true
26+
}
27+
return
28+
}

0 commit comments

Comments
 (0)