From 9f339fe67ed9c15e1c842916c4ce07b5841e6213 Mon Sep 17 00:00:00 2001 From: mfordjody <11638005@qq.com> Date: Tue, 17 Sep 2024 18:37:20 +0800 Subject: [PATCH] [horus] Self-recovering function features --- app/horus/core/horuser/modular.go | 7 ++++--- app/horus/core/horuser/recovery.go | 1 - deploy/horus/horus.yaml | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/app/horus/core/horuser/modular.go b/app/horus/core/horuser/modular.go index bd3445d46..1a77a5521 100644 --- a/app/horus/core/horuser/modular.go +++ b/app/horus/core/horuser/modular.go @@ -50,7 +50,8 @@ func (h *Horuser) CustomizeModular(ctx context.Context) { func (h *Horuser) CustomizeModularOnCluster(clusterName, addr string) { klog.Infof("CustomizeModularOnCluster Start clusterName:%v", clusterName) - for moduleName, ql := range h.cc.CustomModular.CheckQL { + for moduleName, checkql := range h.cc.CustomModular.CheckQL { + ql := checkql vecs, err := h.InstantQuery(addr, ql, clusterName, h.cc.CustomModular.PromQueryTimeSecond) if err != nil { klog.Errorf("CustomizeModularOnCluster InstantQuery err:%v", err) @@ -79,7 +80,7 @@ func (h *Horuser) CustomizeModularOnCluster(clusterName, addr string) { func (h *Horuser) CustomizeModularNodes(clusterName, moduleName, nodeName, ip string) { today := time.Now().Format("2006-01-02") - recoveryQL := h.cc.CustomModular.RecoveryQL[moduleName] + recoveryQL := fmt.Sprintf(h.cc.CustomModular.RecoveryQL[moduleName], nodeName) data, err := db.GetDailyLimitNodeDataInfoDate(today, moduleName, clusterName) if err != nil { @@ -90,7 +91,7 @@ func (h *Horuser) CustomizeModularNodes(clusterName, moduleName, nodeName, ip st dailyLimit := h.cc.CustomModular.CordonDailyLimit[moduleName] if len(data) > dailyLimit { - msg := fmt.Sprintf("【日期:%v】 【集群:%v\n】 【模块今日 Cordon 节点数: %v】\n 【已达到今日上限: %v】\n [节点:%v]", + msg := fmt.Sprintf("【日期:%v】 【集群:%v\n】 【模块今日 Cordon 节点数: %v】\n 【已达到今日上限: %v】\n 【节点:%v】", data, clusterName, moduleName, dailyLimit, nodeName) alert.DingTalkSend(h.cc.CustomModular.DingTalk, msg) return diff --git a/app/horus/core/horuser/recovery.go b/app/horus/core/horuser/recovery.go index 5f7fd47d8..515cf2d81 100644 --- a/app/horus/core/horuser/recovery.go +++ b/app/horus/core/horuser/recovery.go @@ -60,7 +60,6 @@ func (h *Horuser) recoveryNodes(n *db.NodeDataInfo) { klog.Infof("clusterName:%v nodeName:%v", n.ClusterName, n.NodeName) return } - vecs, err := h.InstantQuery(addr, n.RecoveryQL, n.ClusterName, h.cc.NodeRecovery.PromQueryTimeSecond) if err != nil { klog.Errorf("recoveryNodes InstantQuery err:%v ql:%v", err, n.RecoveryQL) diff --git a/deploy/horus/horus.yaml b/deploy/horus/horus.yaml index dd042b015..6884df579 100644 --- a/deploy/horus/horus.yaml +++ b/deploy/horus/horus.yaml @@ -42,7 +42,7 @@ nodeRecovery: enabled: true dayNumber: 1 checkIntervalSecond: 5 - promQueryTimeSecond: 10 + promQueryTimeSecond: 60 dingTalk: webhookUrl: "https://oapi.dingtalk.com/robot/send?access_token=37f8891e60e524013275cc01efafdb5976b81ef7269ce271b769bcd025826c12" title: "horus 通知" @@ -50,22 +50,22 @@ nodeRecovery: - 15000000 customModular: - enabled: false + enabled: true cordonDailyLimit: filesystem_readonly: 5 arp_entries: 5 checkQL: filesystem_readonly: |- - node_filesystem_readonly{mountpoint="/"} == 1 + node_filesystem_readonly{mountpoint="/"} != 1 arp_entries: |- node_arp_entries{device="ens160"} > 2 recoveryQL: filesystem_readonly: |- - node_filesystem_readonly{mountpoint="/"} == 0 + node_filesystem_readonly{mountpoint="/",node="%s"} == 0 arp_entries: |- - node_arp_entries{device="ens160"} > 2 + node_arp_entries{device="ens160",node="%s"} > 2 checkIntervalSecond: 5 - promQueryTimeSecond: 10 + promQueryTimeSecond: 60 kubeMultiple: cluster: config.1 dingTalk: