Skip to content

Commit

Permalink
[horus] Self-recovering function features (#351)
Browse files Browse the repository at this point in the history
  • Loading branch information
mfordjody committed Sep 17, 2024
1 parent 953f0d1 commit 7122f53
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
7 changes: 4 additions & 3 deletions app/horus/core/horuser/modular.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ func (h *Horuser) CustomizeModular(ctx context.Context) {

func (h *Horuser) CustomizeModularOnCluster(clusterName, addr string) {
klog.Infof("CustomizeModularOnCluster Start clusterName:%v", clusterName)
for moduleName, ql := range h.cc.CustomModular.CheckQL {
for moduleName, checkql := range h.cc.CustomModular.CheckQL {
ql := checkql
vecs, err := h.InstantQuery(addr, ql, clusterName, h.cc.CustomModular.PromQueryTimeSecond)
if err != nil {
klog.Errorf("CustomizeModularOnCluster InstantQuery err:%v", err)
Expand Down Expand Up @@ -79,7 +80,7 @@ func (h *Horuser) CustomizeModularOnCluster(clusterName, addr string) {
func (h *Horuser) CustomizeModularNodes(clusterName, moduleName, nodeName, ip string) {
today := time.Now().Format("2006-01-02")

recoveryQL := h.cc.CustomModular.RecoveryQL[moduleName]
recoveryQL := fmt.Sprintf(h.cc.CustomModular.RecoveryQL[moduleName], nodeName)

data, err := db.GetDailyLimitNodeDataInfoDate(today, moduleName, clusterName)
if err != nil {
Expand All @@ -90,7 +91,7 @@ func (h *Horuser) CustomizeModularNodes(clusterName, moduleName, nodeName, ip st

dailyLimit := h.cc.CustomModular.CordonDailyLimit[moduleName]
if len(data) > dailyLimit {
msg := fmt.Sprintf("【日期:%v】 【集群:%v\n】 【模块今日 Cordon 节点数: %v】\n 【已达到今日上限: %v】\n [节点:%v]",
msg := fmt.Sprintf("【日期:%v】 【集群:%v\n】 【模块今日 Cordon 节点数: %v】\n 【已达到今日上限: %v】\n 节点:%v",
data, clusterName, moduleName, dailyLimit, nodeName)
alert.DingTalkSend(h.cc.CustomModular.DingTalk, msg)
return
Expand Down
1 change: 0 additions & 1 deletion app/horus/core/horuser/recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ func (h *Horuser) recoveryNodes(n *db.NodeDataInfo) {
klog.Infof("clusterName:%v nodeName:%v", n.ClusterName, n.NodeName)
return
}

vecs, err := h.InstantQuery(addr, n.RecoveryQL, n.ClusterName, h.cc.NodeRecovery.PromQueryTimeSecond)
if err != nil {
klog.Errorf("recoveryNodes InstantQuery err:%v ql:%v", err, n.RecoveryQL)
Expand Down
12 changes: 6 additions & 6 deletions deploy/horus/horus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,30 +42,30 @@ nodeRecovery:
enabled: true
dayNumber: 1
checkIntervalSecond: 5
promQueryTimeSecond: 10
promQueryTimeSecond: 60
dingTalk:
webhookUrl: "https://oapi.dingtalk.com/robot/send?access_token=37f8891e60e524013275cc01efafdb5976b81ef7269ce271b769bcd025826c12"
title: "horus 通知"
atMobiles:
- 15000000

customModular:
enabled: false
enabled: true
cordonDailyLimit:
filesystem_readonly: 5
arp_entries: 5
checkQL:
filesystem_readonly: |-
node_filesystem_readonly{mountpoint="/"} == 1
node_filesystem_readonly{mountpoint="/"} != 1
arp_entries: |-
node_arp_entries{device="ens160"} > 2
recoveryQL:
filesystem_readonly: |-
node_filesystem_readonly{mountpoint="/"} == 0
node_filesystem_readonly{mountpoint="/",node="%s"} == 0
arp_entries: |-
node_arp_entries{device="ens160"} > 2
node_arp_entries{device="ens160",node="%s"} > 2
checkIntervalSecond: 5
promQueryTimeSecond: 10
promQueryTimeSecond: 60
kubeMultiple:
cluster: config.1
dingTalk:
Expand Down

0 comments on commit 7122f53

Please sign in to comment.