Skip to content

Commit

Permalink
Add monitor info (#5267)
Browse files Browse the repository at this point in the history
* add database feishu info and cockroach monitor
  • Loading branch information
wallyxjh authored Dec 6, 2024
1 parent df66830 commit 05399c1
Show file tree
Hide file tree
Showing 15 changed files with 512 additions and 233 deletions.
64 changes: 55 additions & 9 deletions service/exceptionmonitor/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,33 +22,71 @@ type QueryResult struct {
} `json:"data"`
}

type Info struct {
// lastStatus、recoveryStatus、lastStatusTime、recoveryStatusTime、lastStatusInfo、recoveryStatusInfo
//todo 是否应该分几个状态,是否有状态不正确的地方
DatabaseClusterName string
Namespace string
DebtLevel string
DatabaseType string
Events string
Reason string
NotificationType string
DiskUsage string
CPUUsage string
MemUsage string
PerformanceType string
ExceptionType string
ExceptionStatus string
RecoveryStatus string
ExceptionStatusTime string
RecoveryTime string
DatabaseClusterUID string
FeishuWebHook string
//struct
FeishuInfo []map[string]interface{}
}

type NameSpaceQuota struct {
NameSpace string
CPULimit string
MemLimit string
GPULimit string
EphemeralStorageLimit string
ObjectStorageLimit string
NodePortLimit string
StorageLimit string
CPUUsage string
MemUsage string
GPUUsage string
EphemeralStorageUsage string
ObjectStorageUsage string
NodePortUsage string
StorageUsage string
}

const (
StatusDeleting = "Deleting"
StatusCreating = "Creating"
StatusStopping = "Stopping"
StatusStopped = "Stopped"
StatusRunning = "Running"
StatusUpdating = "Updating"
//StatusUpdating = "Updating"
StatusUnknown = ""
MonitorTypeALL = "all"
)

var (
ClientSet *kubernetes.Clientset
DynamicClient *dynamic.DynamicClient
// records the last database status
LastDatabaseClusterStatus = make(map[string]string)
// record the debt ns
ExceptionDatabaseMap = make(map[string]bool)
FeishuWebHookMap = make(map[string]string)
ClientSet *kubernetes.Clientset
DynamicClient *dynamic.DynamicClient
DebtNamespaceMap = make(map[string]bool)
DiskFullNamespaceMap = make(map[string]bool)
DiskMonitorNamespaceMap = make(map[string]bool)
CPUMonitorNamespaceMap = make(map[string]bool)
MemMonitorNamespaceMap = make(map[string]bool)
LastBackupStatusMap = make(map[string]string)
IsSendBackupStatusMap = make(map[string]string)
DatabaseNamespaceMap = make(map[string]string)
DatabaseNotificationInfoMap = make(map[string]*Info)
ExceededQuotaException = "exceeded quota"
DiskException = "Writing to log file failed"
OwnerLabel = "user.sealos.io/owner"
Expand All @@ -65,13 +103,16 @@ var (
CPUMemMonitor bool
BackupMonitor bool
QuotaMonitor bool
CockroachMonitor bool
DatabaseDiskMonitorThreshold float64
DatabaseExceptionMonitorThreshold float64
DatabaseCPUMonitorThreshold float64
DatabaseMemMonitorThreshold float64
QuotaThreshold float64
APPID string
APPSECRET string
GlobalCockroachURI string
LocalCockroachURI string
DatabaseStatusMessageIDMap = make(map[string]string)
DatabaseDiskMessageIDMap = make(map[string]string)
DatabaseCPUMessageIDMap = make(map[string]string)
Expand All @@ -90,11 +131,14 @@ func GetENV() error {
MonitorType = getEnvWithCheck("MonitorType", &missingEnvVars)
clusterNS := getEnvWithCheck("ClusterNS", &missingEnvVars)
LOCALREGION = getEnvWithCheck("LOCALREGION", &missingEnvVars)
GlobalCockroachURI = getEnvWithCheck("GlobalCockroachURI", &missingEnvVars)
LocalCockroachURI = getEnvWithCheck("LocalCockroachURI", &missingEnvVars)
DatabaseMonitor, _ = strconv.ParseBool(getEnvWithCheck("DatabaseMonitor", &missingEnvVars))
DiskMonitor, _ = strconv.ParseBool(getEnvWithCheck("DiskMonitor", &missingEnvVars))
CPUMemMonitor, _ = strconv.ParseBool(getEnvWithCheck("CPUMemMonitor", &missingEnvVars))
BackupMonitor, _ = strconv.ParseBool(getEnvWithCheck("BackupMonitor", &missingEnvVars))
QuotaMonitor, _ = strconv.ParseBool(getEnvWithCheck("QuotaMonitor", &missingEnvVars))
CockroachMonitor, _ = strconv.ParseBool(getEnvWithCheck("CockroachMonitor", &missingEnvVars))
DatabaseDiskMonitorThreshold, _ = strconv.ParseFloat(getEnvWithCheck("DatabaseDiskMonitorThreshold", &missingEnvVars), 64)
DatabaseExceptionMonitorThreshold, _ = strconv.ParseFloat(getEnvWithCheck("DatabaseExceptionMonitorThreshold", &missingEnvVars), 64)
DatabaseCPUMonitorThreshold, _ = strconv.ParseFloat(getEnvWithCheck("DatabaseCPUMonitorThreshold", &missingEnvVars), 64)
Expand All @@ -119,6 +163,8 @@ func GetENV() error {
"FeishuWebhookURLBackup",
//Quota
"FeishuWebhookURLQuota",
//CockroachDB
"FeishuWebhookURLCockroachDB",
}, FeishuWebhookURLMap, &missingEnvVars)

// Get ClusterRegionMap
Expand Down
3 changes: 1 addition & 2 deletions service/exceptionmonitor/dao/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ package dao
import (
"os"

"github.com/labring/sealos/service/exceptionmonitor/api"

"github.com/labring/sealos/controllers/pkg/database/cockroach"
"github.com/labring/sealos/service/exceptionmonitor/api"
)

var (
Expand Down
7 changes: 4 additions & 3 deletions service/exceptionmonitor/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ require (
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
Expand All @@ -41,6 +41,7 @@ require (
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/larksuite/oapi-sdk-go/v3 v3.2.9 // indirect
github.com/lib/pq v1.10.9 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/matoous/go-nanoid/v2 v2.0.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
Expand All @@ -51,7 +52,7 @@ require (
go.mongodb.org/mongo-driver v1.12.1 // indirect
golang.org/x/crypto v0.21.0 // indirect
golang.org/x/net v0.23.0 // indirect
golang.org/x/oauth2 v0.10.0 // indirect
golang.org/x/oauth2 v0.12.0 // indirect
golang.org/x/sys v0.18.0 // indirect
golang.org/x/term v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
Expand All @@ -70,7 +71,7 @@ require (
k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)

replace (
Expand Down
2 changes: 2 additions & 0 deletions service/exceptionmonitor/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ github.com/larksuite/oapi-sdk-go v1.1.48 h1:RHRr5LW68AibBzXVRXObUpkbS6TXapl4TAyh
github.com/larksuite/oapi-sdk-go v1.1.48/go.mod h1:7ybKAbVdKBjXuX0YrMTfnWUyCaIe/zeI1wqjNfN9XOk=
github.com/larksuite/oapi-sdk-go/v3 v3.2.9 h1:9zQAGrzhibNwdaGRkWUP1cAd2k2dJJDpbSffcfK0wPw=
github.com/larksuite/oapi-sdk-go/v3 v3.2.9/go.mod h1:ZEplY+kwuIrj/nqw5uSCINNATcH3KdxSN7y+UxYY5fI=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/matoous/go-nanoid v1.5.0/go.mod h1:zyD2a71IubI24efhpvkJz+ZwfwagzgSO6UNiFsZKN7U=
Expand Down
54 changes: 54 additions & 0 deletions service/exceptionmonitor/helper/monitor/cockroachdb_monitor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package monitor

import (
"fmt"
"log"
"time"

"github.com/labring/sealos/service/exceptionmonitor/api"
"github.com/labring/sealos/service/exceptionmonitor/helper/notification"
"gorm.io/driver/postgres"
"gorm.io/gorm"
"gorm.io/gorm/logger"
)

func CockroachMonitor() {
for api.CockroachMonitor {
notificationInfo := &api.Info{
FeishuWebHook: api.FeishuWebhookURLMap["FeishuWebhookURLCockroachDB"],
}
monitorCockroachDB(api.GlobalCockroachURI, "Global", notificationInfo)
monitorCockroachDB(api.LocalCockroachURI, "Local", notificationInfo)

time.Sleep(5 * time.Minute)
}
}

func monitorCockroachDB(uri, label string, notificationInfo *api.Info) {
if err := checkCockroachDB(uri); err != nil {
message := notification.GetCockroachMessage(err.Error(), label)
if sendErr := notification.SendFeishuNotification(notificationInfo, message); sendErr != nil {
log.Printf("Failed to send Feishu notification for %s: %v", label, sendErr)
}
}
}

func checkCockroachDB(CockroachConnection string) error {
db, err := gorm.Open(postgres.Open(CockroachConnection), &gorm.Config{
Logger: logger.Discard,
})
if err != nil {
return fmt.Errorf("failed to connect to CockroachDB: %v", err)
}

sqlDB, err := db.DB()
if err != nil {
return fmt.Errorf("failed to get database instance: %v", err)
}
defer sqlDB.Close()

if err := sqlDB.Ping(); err != nil {
return fmt.Errorf("failed to ping CockroachDB: %v", err)
}
return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -90,17 +90,18 @@ func processBackup(backup unstructured.Unstructured) {
}

func SendBackupNotification(backupName, namespace, status, startTimestamp string) {
notificationInfo := notification.Info{
notificationInfo := api.Info{
DatabaseClusterName: backupName,
Namespace: namespace,
Status: status,
ExceptionStatus: status,
ExceptionType: "备份",
PerformanceType: "Backup",
NotificationType: "exception",
NotificationType: notification.ExceptionType,
FeishuWebHook: api.FeishuWebhookURLMap["FeishuWebhookURLBackup"],
}
if _, ok := api.LastBackupStatusMap[backupName]; !ok {
message := notification.GetBackupMessage("exception", namespace, backupName, status, startTimestamp, "")
if err := notification.SendFeishuNotification(notificationInfo, message, api.FeishuWebhookURLMap["FeishuWebhookURLBackup"]); err != nil {
message := notification.GetBackupMessage(notification.ExceptionType, namespace, backupName, status, startTimestamp, "")
if err := notification.SendFeishuNotification(&notificationInfo, message); err != nil {
log.Printf("Error sending exception notification:%v", err)
}
api.LastBackupStatusMap[backupName] = status
Expand Down
Loading

0 comments on commit 05399c1

Please sign in to comment.