-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathchecks.go
64 lines (52 loc) · 1.4 KB
/
checks.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
package main
import (
"errors"
"time"
)
var (
ErrReplicationSlotNotFound = errors.New("replication slot not found")
ErrReplicationSlotLagTooHigh = errors.New("replication lag is too high")
)
type HealthChecker struct {
dataSource ReplicationDataSource
}
func NewHealthChecker(dataSource ReplicationDataSource) *HealthChecker {
return &HealthChecker{
dataSource: dataSource,
}
}
func (hc *HealthChecker) isInRecovery() (bool, error) {
return hc.dataSource.IsInRecovery()
}
func (hc *HealthChecker) getStatReplicationByName(slotName string) (*PgStatReplication, error) {
stats, err := hc.dataSource.GetPgStatReplication()
if err != nil {
return nil, err
}
for _, stat := range stats {
if stat.ApplicationName == slotName {
return stat, nil
}
}
return nil, nil
}
// A healthy database replica is:
// 1. Online and accepting connections.
// 2. Is actively replicating from the upstream DB.
// 3. Has a lag of <= 1 second.
func (hc *HealthChecker) CheckReplicationSlot(slotName string) error {
statReplication, err := hc.getStatReplicationByName(slotName)
if err != nil {
return err
}
// NOTE: It would be nice to differentiate between not found and inactive.
// But, is it worth an extra query?
if statReplication == nil {
return ErrReplicationSlotNotFound
}
if statReplication.LagFromUpstream() > time.Second {
return ErrReplicationSlotLagTooHigh
}
// The DB is healthy.
return nil
}