Skip to content

Commit 354326a

Browse files
lichuan0620caicloud-bot
authored andcommitted
[release-1.2-starbucks]chore(*): cherrypick stability fix from master (#49)
* fix(*): monitor subprocess status (#43) * refactor subprocess start and stop * refactor watch /config/filebeat-output.yml * fix bug * fix bug (cherry picked from commit cae2680) * chore(*): add mount propagation (#45) (cherry picked from commit f0b561a)
1 parent 931a1de commit 354326a

File tree

4 files changed

+206
-41
lines changed

4 files changed

+206
-41
lines changed

cmd/filebeat-keeper/cmd.go

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package main
2+
3+
import (
4+
"os/exec"
5+
"syscall"
6+
"time"
7+
8+
"github.com/caicloud/nirvana/log"
9+
)
10+
11+
const (
12+
waitingTime = 60
13+
)
14+
15+
type AsyncCmd struct {
16+
cmd *exec.Cmd
17+
waitDone chan struct{}
18+
finished bool
19+
}
20+
21+
func WrapCmd(cmd *exec.Cmd) *AsyncCmd {
22+
return &AsyncCmd{
23+
cmd: cmd,
24+
waitDone: make(chan struct{}),
25+
finished: false,
26+
}
27+
}
28+
29+
func (ac *AsyncCmd) Start() error {
30+
if err := ac.cmd.Start(); err != nil {
31+
return err
32+
}
33+
34+
go func(ac *AsyncCmd) {
35+
ac.cmd.Wait()
36+
close(ac.waitDone)
37+
ac.finished = true
38+
}(ac)
39+
40+
return nil
41+
}
42+
43+
func (ac *AsyncCmd) Stop() error {
44+
log.Infoln("Send TERM signal")
45+
if err := ac.cmd.Process.Signal(syscall.SIGTERM); err != nil {
46+
return err
47+
}
48+
49+
select {
50+
case <-ac.waitDone:
51+
return nil
52+
case <-time.After(waitingTime * time.Second):
53+
log.Infoln("Kill Process")
54+
if err := ac.cmd.Process.Kill(); err != nil {
55+
return err
56+
}
57+
}
58+
59+
<-ac.waitDone
60+
return nil
61+
}
62+
63+
func (ac *AsyncCmd) Exited() bool {
64+
return ac.finished
65+
}

cmd/filebeat-keeper/main.go

+95-41
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,124 @@
11
package main
22

33
import (
4+
"crypto/sha256"
45
"flag"
56
"fmt"
7+
"io"
68
"io/ioutil"
79
"os"
810
"os/exec"
911
"path/filepath"
12+
"strconv"
1013
"sync"
11-
"syscall"
1214
"text/template"
15+
"time"
1316

1417
"gopkg.in/yaml.v2"
1518

1619
"github.com/caicloud/logging-admin/pkg/util/graceful"
1720
"github.com/caicloud/logging-admin/pkg/util/osutil"
1821

1922
"github.com/caicloud/nirvana/log"
20-
"gopkg.in/fsnotify/fsnotify.v1"
23+
)
24+
25+
const (
26+
HeatlthCheckInterval = "HEATLTH_CHECK_INTERVAL"
27+
ConfigCheckInterval = "CONFIG_CHECK_INTERVAL"
2128
)
2229

2330
var (
2431
filebeatExecutablePath = osutil.Getenv("FB_EXE_PATH", "filebeat")
2532
srcConfigPath = osutil.Getenv("SRC_CONFIG_PATH", "/config/filebeat-output.yml")
2633
dstConfigPath = osutil.Getenv("DST_CONFIG_PATH", "/etc/filebeat/filebeat.yml")
34+
heatlthCheckInterval = int64(10)
35+
configCheckInterval = int64(600)
2736
)
2837

29-
// When configmap being created for the first time, following events received:
30-
// INFO 1206-09:38:39.496+00 main.go:41 | Event: "/config/..2018_12_06_09_38_39.944532540": CREATE
31-
// INFO 1206-09:38:39.496+00 main.go:41 | Event: "/config/..2018_12_06_09_38_39.944532540": CHMOD
32-
// INFO 1206-09:38:39.497+00 main.go:41 | Event: "/config/filebeat-output.yml": CREATE
33-
// INFO 1206-09:38:39.497+00 main.go:41 | Event: "/config/..data_tmp": RENAME
34-
// INFO 1206-09:38:39.497+00 main.go:41 | Event: "/config/..data": CREATE
35-
// INFO 1206-09:38:39.497+00 main.go:41 | Event: "/config/..2018_12_06_09_37_32.878326343": REMOVE
36-
// When configmap being modified, following events received:
37-
// INFO 1206-09:42:56.488+00 main.go:41 | Event: "/config/..2018_12_06_09_42_56.160544363": CREATE
38-
// INFO 1206-09:42:56.488+00 main.go:41 | Event: "/config/..2018_12_06_09_42_56.160544363": CHMOD
39-
// INFO 1206-09:42:56.488+00 main.go:41 | Event: "/config/..data_tmp": RENAME
40-
// INFO 1206-09:42:56.488+00 main.go:41 | Event: "/config/..data": CREATE
41-
// INFO 1206-09:42:56.488+00 main.go:41 | Event: "/config/..2018_12_06_09_38_39.944532540": REMOVE
42-
func watchFileChange(path string, reloadCh chan<- struct{}) error {
43-
w, err := fsnotify.NewWatcher()
38+
func init() {
39+
sec, err := strconv.ParseInt(osutil.Getenv(HeatlthCheckInterval,
40+
strconv.FormatInt(heatlthCheckInterval, 10)), 10, 64)
41+
if err != nil || sec < 0 {
42+
log.Warningf("%s is Invalid, use default value %d", HeatlthCheckInterval, heatlthCheckInterval)
43+
} else {
44+
heatlthCheckInterval = sec
45+
}
46+
47+
sec, err = strconv.ParseInt(osutil.Getenv(ConfigCheckInterval,
48+
strconv.FormatInt(configCheckInterval, 10)), 10, 64)
49+
if err != nil || sec < 0 {
50+
log.Warningf("%s is Invalid, use default value %d", ConfigCheckInterval, configCheckInterval)
51+
} else {
52+
configCheckInterval = sec
53+
}
54+
}
55+
56+
func hashFile(path string) (string, error) {
57+
f, err := os.Open(path)
4458
if err != nil {
45-
return err
59+
return "", err
4660
}
47-
if err := w.Add(path); err != nil {
48-
return err
61+
defer f.Close()
62+
63+
h := sha256.New()
64+
if _, err := io.Copy(h, f); err != nil {
65+
return "", err
4966
}
5067

51-
for {
52-
select {
53-
case ev := <-w.Events:
54-
log.Infoln("Event:", ev.String())
55-
if ev.Op&fsnotify.Create == fsnotify.Create {
56-
if filepath.Base(ev.Name) == "..data" {
57-
log.Infoln("Configmap updated")
58-
reloadCh <- struct{}{}
59-
}
60-
}
61-
case err := <-w.Errors:
62-
log.Errorf("Watch error: %v", err)
68+
return string(h.Sum(nil)), nil
69+
}
70+
71+
func newFileChecker(path string, notify func()) func() {
72+
var (
73+
curHash string
74+
mtx sync.Mutex
75+
err error
76+
)
77+
78+
curHash, err = hashFile(path)
79+
if err != nil {
80+
log.Warningln(err)
81+
}
82+
83+
return func() {
84+
mtx.Lock()
85+
defer mtx.Unlock()
86+
87+
h, err := hashFile(path)
88+
if err != nil {
89+
log.Warningln(err)
90+
return
91+
}
92+
93+
if curHash != h {
94+
log.Infof("file need reload, old: %x, new: %x", curHash, h)
95+
curHash = h
96+
notify()
6397
}
6498
}
6599
}
66100

101+
func watchFileChange(path string, reloadCh chan<- struct{}) {
102+
checker := newFileChecker(path, func() { reloadCh <- struct{}{} })
103+
104+
//watch CM
105+
go watchConfigMapUpdate(filepath.Dir(path), checker)
106+
107+
//定时监测
108+
go func(checkFile func()) {
109+
check := time.Tick(time.Duration(configCheckInterval) * time.Second)
110+
for range check {
111+
checkFile()
112+
}
113+
}(checker)
114+
}
115+
67116
func run(stopCh <-chan struct{}) error {
68117
reloadCh := make(chan struct{}, 1)
69118
started := false
70119
cmd := newCmd()
71120

72-
go watchFileChange(filepath.Dir(srcConfigPath), reloadCh)
121+
watchFileChange(srcConfigPath, reloadCh)
73122

74123
if err := applyChange(); err == nil {
75124
reloadCh <- struct{}{}
@@ -78,11 +127,12 @@ func run(stopCh <-chan struct{}) error {
78127
log.Infoln("Filebeat will not start until configmap being updated")
79128
}
80129

130+
check := time.Tick(time.Duration(heatlthCheckInterval) * time.Second)
81131
for {
82132
select {
83133
case <-stopCh:
84134
log.Infoln("Wait filebeat shutdown")
85-
if err := cmd.Wait(); err != nil {
135+
if err := cmd.Stop(); err != nil {
86136
return fmt.Errorf("filebeat quit with error: %v", err)
87137
}
88138
return nil
@@ -100,11 +150,7 @@ func run(stopCh <-chan struct{}) error {
100150
log.Infoln("Filebeat start")
101151
started = true
102152
} else {
103-
log.Infoln("Send TERM signal")
104-
if err := cmd.Process.Signal(syscall.SIGTERM); err != nil {
105-
return fmt.Errorf("error send signal: %v", err)
106-
}
107-
if err := cmd.Wait(); err != nil {
153+
if err := cmd.Stop(); err != nil {
108154
return fmt.Errorf("filebeat quit with error: %v", err)
109155
}
110156
log.Infoln("Filebeat quit")
@@ -114,6 +160,13 @@ func run(stopCh <-chan struct{}) error {
114160
return fmt.Errorf("error run filebeat: %v", err)
115161
}
116162
}
163+
case <-check:
164+
if started {
165+
if cmd != nil && cmd.Exited() {
166+
log.Fatalln("Filebeat has unexpectedly exited")
167+
os.Exit(1)
168+
}
169+
}
117170
}
118171
}
119172
}
@@ -158,12 +211,13 @@ var (
158211
fbArgs []string
159212
)
160213

161-
func newCmd() *exec.Cmd {
214+
func newCmd() *AsyncCmd {
162215
log.Infof("Will run filebeat with command: %v %v", filebeatExecutablePath, fbArgs)
163216
cmd := exec.Command(filebeatExecutablePath, fbArgs...)
164217
cmd.Stderr = os.Stderr
165218
cmd.Stdout = os.Stdout
166-
return cmd
219+
220+
return WrapCmd(cmd)
167221
}
168222

169223
func main() {

cmd/filebeat-keeper/watch.go

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package main
2+
3+
import (
4+
"path/filepath"
5+
6+
"github.com/caicloud/nirvana/log"
7+
"gopkg.in/fsnotify/fsnotify.v1"
8+
)
9+
10+
const (
11+
dataDirName = "..data"
12+
)
13+
14+
// ref_link [https://github.com/jimmidyson/configmap-reload/issues/6#issuecomment-355203620]
15+
// ConfigMap volumes use an atomic writer. You could familarize yourself with
16+
// the mechanic how atomic writes are implemented. In the end you could check
17+
// if the actual change you do in your ConfigMap results in the rename of the
18+
// ..data-symlink (step 9).
19+
// ref_link [https://github.com/kubernetes/kubernetes/blob/6d98cdbbfb055757a9846dee97dafd4177d9a222/pkg/volume/util/atomic_writer.go#L56]
20+
func watchConfigMapUpdate(path string, update func()) error {
21+
w, err := fsnotify.NewWatcher()
22+
if err != nil {
23+
return err
24+
}
25+
if err := w.Add(path); err != nil {
26+
return err
27+
}
28+
29+
for {
30+
select {
31+
case ev := <-w.Events:
32+
log.Infoln("Event:", ev.String())
33+
if ev.Op&fsnotify.Create == fsnotify.Create {
34+
if filepath.Base(ev.Name) == dataDirName {
35+
log.Infoln("Configmap updated")
36+
update()
37+
}
38+
}
39+
case err := <-w.Errors:
40+
log.Errorf("Watch error: %v", err)
41+
}
42+
}
43+
}

release/logging-filebeat.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,15 @@ _config:
7878
- name: varlog
7979
path: /opt/filebeat
8080
subpath: filebeat
81+
propagation: HostToContainer
8182
- name: varlibdocker
8283
path: /var/lib/docker
8384
readonly: true
85+
propagation: HostToContainer
8486
- name: docker-sock
8587
path: /var/run/docker.sock
8688
readonly: true
89+
propagation: HostToContainer
8790
- image: '[[ registry_release ]]/beat-exporter:v0.1.2'
8891
imagePullPolicy: Always
8992
resources:

0 commit comments

Comments
 (0)