Skip to content

Commit

Permalink
[horus] Building Self-Healing Logic (#346)
Browse files Browse the repository at this point in the history
fix typo
  • Loading branch information
mfordjody authored Sep 16, 2024
1 parent c10e540 commit 2ceef7a
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 5 deletions.
2 changes: 1 addition & 1 deletion app/horus/basic/config/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,6 @@ type SlackConfiguration struct {
type RecoveryConfiguration struct {
DayNumber int `yaml:"dayNumber"`
CheckIntervalSecond int `yaml:"checkIntervalSecond"`
PromQueryTimeSecond int `yaml:"promQueryTimeSecond"`
PromQueryTimeSecond int64 `yaml:"promQueryTimeSecond"`
DingTalk *DingTalkConfiguration `yaml:"dingTalk"`
}
7 changes: 6 additions & 1 deletion app/horus/basic/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,12 @@ func (n *NodeDataInfo) AddOrGet() (int64, error) {

func GetRecoveryNodeDataInfoDate(day int) ([]*NodeDataInfo, error) {
var ndi []*NodeDataInfo
session := db.Where(fmt.Sprintf("first_date > DATE_SUB(CURDATE(),INTERVAL %d DAY)", day))
session := db.Where(fmt.Sprintf("recovery_mark = 0 AND first_date > DATE_SUB(CURDATE(), INTERVAL %d DAY)", day))
err := session.Find(&ndi)
return nil, err
}

func (n *NodeDataInfo) RecoveryMarker() (bool, error) {
n.RecoveryMark = 1
return n.Update()
}
43 changes: 43 additions & 0 deletions app/horus/core/horuser/recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ package horuser

import (
"context"
"fmt"
"github.com/apache/dubbo-kubernetes/app/horus/basic/db"
"github.com/apache/dubbo-kubernetes/app/horus/core/alert"
"github.com/gammazero/workerpool"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"
"time"
Expand All @@ -39,4 +42,44 @@ func (h *Horuser) recoveryCheck(ctx context.Context) {
klog.Errorf("recovery check GetRecoveryNodeDataInfoDate zero.")
return
}
wp := workerpool.New(5)
for _, d := range data {
d := d
wp.Submit(func() {
h.recoveryNodes(d)
})

}
wp.StopWait()
}

func (h *Horuser) recoveryNodes(n *db.NodeDataInfo) {
addr := h.cc.PromMultiple[n.ClusterName]
if addr == "" {
klog.Errorf("recoveryNodes PromMultiple get addr empty.")
klog.Infof("clusterName:%v nodeName:%v", n.ClusterName, n.NodeName)
return
}
ql := fmt.Sprintf(n.RecoveryQL, n.NodeName)
vecs, err := h.InstantQuery(addr, ql, n.ClusterName, h.cc.NodeRecovery.PromQueryTimeSecond)
if err != nil {
klog.Errorf("recoveryNodes instantQuery err:%v ql:%v", err, ql)
return
}
if len(vecs) != 2 {
return
}
klog.Infof("recoveryNodes check success.")

err = h.UnCordon(n.NodeName, n.ClusterName)
res := "success"
if err != nil {
res = fmt.Sprintf("failed:%v", err)
}
msg := fmt.Sprintf("【自愈检查 %v: 恢复节点调度】【集群: %v】\n【节点: %v】【日期: %v】\n"+
"【自愈检查 QL: %v】", res, n.ClusterName, n.NodeName, n.CreateTime, ql)
alert.DingTalkSend(h.cc.NodeRecovery.DingTalk, msg)

pass, err := n.RecoveryMarker()
klog.Infof("RecoveryMarker result pass:%v err:%v", pass, err)
}
7 changes: 4 additions & 3 deletions deploy/horus/horus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ nodeRecovery:
checkIntervalSecond: 60
promQueryTimeSecond: 5
dingTalk:
webhookUrl: ~
title: ~
atMobiles: ~
webhookUrl: "https://oapi.dingtalk.com/robot/send?access_token=aa2f3f74d7a2504653ca89b7a673707ba1d04b6d9d320c3572e5464d8f81471e"
title: "【项目处理通知】"
atMobiles:
- 15000000
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ require (
github.com/fatih/color v1.16.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
github.com/gammazero/deque v0.2.0 // indirect
github.com/gammazero/workerpool v1.1.3 // indirect
github.com/gdamore/encoding v1.0.0 // indirect
github.com/gdamore/tcell/v2 v2.6.0 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,10 @@ github.com/fullstorydev/grpcurl v1.9.1 h1:YxX1aCcCc4SDBQfj9uoWcTLe8t4NWrZe1y+mk8
github.com/fullstorydev/grpcurl v1.9.1/go.mod h1:i8gKLIC6s93WdU3LSmkE5vtsCxyRmihUj5FK1cNW5EM=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
github.com/gammazero/deque v0.2.0 h1:SkieyNB4bg2/uZZLxvya0Pq6diUlwx7m2TeT7GAIWaA=
github.com/gammazero/deque v0.2.0/go.mod h1:LFroj8x4cMYCukHJDbxFCkT+r9AndaJnFMuZDV34tuU=
github.com/gammazero/workerpool v1.1.3 h1:WixN4xzukFoN0XSeXF6puqEqFTl2mECI9S6W44HWy9Q=
github.com/gammazero/workerpool v1.1.3/go.mod h1:wPjyBLDbyKnUn2XwwyD3EEwo9dHutia9/fwNmSHWACc=
github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko=
github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg=
github.com/gdamore/tcell/v2 v2.4.1-0.20210905002822-f057f0a857a1/go.mod h1:Az6Jt+M5idSED2YPGtwnfJV0kXohgdCBPmHGSYc1r04=
Expand Down

0 comments on commit 2ceef7a

Please sign in to comment.