Skip to content

Commit d9ce265

Browse files
authored
feat(Flags): [Breaking] Add flag for snapshot duration frequency (#7675)
* Add flag for snapshot duration frequency * Use both entries and duration based flags for snapshots
1 parent 3d18b0f commit d9ce265

File tree

5 files changed

+58
-57
lines changed

5 files changed

+58
-57
lines changed

compose/compose.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ func getAlpha(idx int, raft string) service {
311311
}
312312

313313
if opts.SnapshotAfter != "" {
314-
raft = fmt.Sprintf("%s; snapshot-after=%s", raft, opts.SnapshotAfter)
314+
raft = fmt.Sprintf("%s; %s", raft, opts.SnapshotAfter)
315315
}
316316
svc.Command += fmt.Sprintf(` --raft "%s"`, raft)
317317

dgraph/cmd/alpha/run.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,13 @@ they form a Raft group and provide synchronous replication.
158158
Flag("learner",
159159
`Make this Alpha a "learner" node. In learner mode, this Alpha will not participate `+
160160
"in Raft elections. This can be used to achieve a read-only replica.").
161-
Flag("snapshot-after",
161+
Flag("snapshot-after-entries",
162162
"Create a new Raft snapshot after N number of Raft entries. The lower this number, "+
163-
"the more frequent snapshot creation will be.").
163+
"the more frequent snapshot creation will be. Snapshots are created only if both "+
164+
"snapshot-after-duration and snapshot-after-entries threshold are crossed.").
165+
Flag("snapshot-after-duration",
166+
"Frequency at which we should create a new raft snapshots. Set "+
167+
"to 0 to disable duration based snapshot.").
164168
Flag("pending-proposals",
165169
"Number of pending mutation proposals. Useful for rate limiting.").
166170
String())

worker/docker-compose.yml

+25-40
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Auto-generated with: [./compose -a 6 -z 3 -j -w --port_offset=0 --expose_ports=false -O ../worker/docker-compose.yml --mem= --snapshot_after=100 --names=false]
1+
# Auto-generated with: [./compose -a 6 -z 3 -j -w --port_offset=0 --expose_ports=false -O ../worker/docker-compose.yml --mem= --snapshot_after=snapshot-after-entries=100; snapshot-after-duration=1m --names=false]
22
#
33
version: "3.5"
44
services:
@@ -15,11 +15,9 @@ services:
1515
source: $GOPATH/bin
1616
target: /gobin
1717
read_only: true
18-
command: /gobin/dgraph alpha --my=alpha1:7080
19-
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2
20-
--security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
21-
--raft "snapshot-after=100;"
22-
--trace "jaeger=http://jaeger:14268;"
18+
command: /gobin/dgraph alpha --trace "jaeger=http://jaeger:14268;" --my=alpha1:7080
19+
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2 --raft "idx=1; group=1;
20+
snapshot-after-entries=100; snapshot-after-duration=1m" --security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
2321
alpha2:
2422
image: dgraph/dgraph:latest
2523
working_dir: /data/alpha2
@@ -33,11 +31,9 @@ services:
3331
source: $GOPATH/bin
3432
target: /gobin
3533
read_only: true
36-
command: /gobin/dgraph alpha --my=alpha2:7080
37-
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2
38-
--security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
39-
--raft "snapshot-after=100;"
40-
--trace "jaeger=http://jaeger:14268;"
34+
command: /gobin/dgraph alpha --trace "jaeger=http://jaeger:14268;" --my=alpha2:7080
35+
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2 --raft "idx=2; group=1;
36+
snapshot-after-entries=100; snapshot-after-duration=1m" --security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
4137
alpha3:
4238
image: dgraph/dgraph:latest
4339
working_dir: /data/alpha3
@@ -51,11 +47,9 @@ services:
5147
source: $GOPATH/bin
5248
target: /gobin
5349
read_only: true
54-
command: /gobin/dgraph alpha --my=alpha3:7080
55-
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2
56-
--security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
57-
--raft "snapshot-after=100;"
58-
--trace "jaeger=http://jaeger:14268;"
50+
command: /gobin/dgraph alpha --trace "jaeger=http://jaeger:14268;" --my=alpha3:7080
51+
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2 --raft "idx=3; group=1;
52+
snapshot-after-entries=100; snapshot-after-duration=1m" --security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
5953
alpha4:
6054
image: dgraph/dgraph:latest
6155
working_dir: /data/alpha4
@@ -69,11 +63,9 @@ services:
6963
source: $GOPATH/bin
7064
target: /gobin
7165
read_only: true
72-
command: /gobin/dgraph alpha --my=alpha4:7080
73-
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2
74-
--security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
75-
--raft "snapshot-after=100;"
76-
--trace "jaeger=http://jaeger:14268;"
66+
command: /gobin/dgraph alpha --trace "jaeger=http://jaeger:14268;" --my=alpha4:7080
67+
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2 --raft "idx=4; group=2;
68+
snapshot-after-entries=100; snapshot-after-duration=1m" --security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
7769
alpha5:
7870
image: dgraph/dgraph:latest
7971
working_dir: /data/alpha5
@@ -87,11 +79,9 @@ services:
8779
source: $GOPATH/bin
8880
target: /gobin
8981
read_only: true
90-
command: /gobin/dgraph alpha --my=alpha5:7080
91-
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2
92-
--security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
93-
--raft "snapshot-after=100;"
94-
--trace "jaeger=http://jaeger:14268;"
82+
command: /gobin/dgraph alpha --trace "jaeger=http://jaeger:14268;" --my=alpha5:7080
83+
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2 --raft "idx=5; group=2;
84+
snapshot-after-entries=100; snapshot-after-duration=1m" --security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
9585
alpha6:
9686
image: dgraph/dgraph:latest
9787
working_dir: /data/alpha6
@@ -105,11 +95,9 @@ services:
10595
source: $GOPATH/bin
10696
target: /gobin
10797
read_only: true
108-
command: /gobin/dgraph alpha --my=alpha6:7080
109-
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2
110-
--security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
111-
--raft "snapshot-after=100;"
112-
--trace "jaeger=http://jaeger:14268;"
98+
command: /gobin/dgraph alpha --trace "jaeger=http://jaeger:14268;" --my=alpha6:7080
99+
--zero=zero1:5080,zero2:5080,zero3:5080 --logtostderr -v=2 --raft "idx=6; group=2;
100+
snapshot-after-entries=100; snapshot-after-duration=1m" --security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;"
113101
jaeger:
114102
image: jaegertracing/all-in-one:1.18
115103
working_dir: /working/jaeger
@@ -133,9 +121,8 @@ services:
133121
source: $GOPATH/bin
134122
target: /gobin
135123
read_only: true
136-
command: /gobin/dgraph zero --raft "idx=1;" --my=zero1:5080
137-
--replicas=3 --logtostderr -v=2 --bindall
138-
--trace "jaeger=http://jaeger:14268;"
124+
command: /gobin/dgraph zero --trace "jaeger=http://jaeger:14268;" --raft='idx=1'
125+
--my=zero1:5080 --replicas=3 --logtostderr -v=2 --bindall
139126
zero2:
140127
image: dgraph/dgraph:latest
141128
working_dir: /data/zero2
@@ -151,9 +138,8 @@ services:
151138
source: $GOPATH/bin
152139
target: /gobin
153140
read_only: true
154-
command: /gobin/dgraph zero --raft "idx=2;" --my=zero2:5080
155-
--replicas=3 --logtostderr -v=2 --peer=zero1:5080
156-
--trace "jaeger=http://jaeger:14268;"
141+
command: /gobin/dgraph zero --trace "jaeger=http://jaeger:14268;" --raft='idx=2'
142+
--my=zero2:5080 --replicas=3 --logtostderr -v=2 --peer=zero1:5080
157143
zero3:
158144
image: dgraph/dgraph:latest
159145
working_dir: /data/zero3
@@ -169,7 +155,6 @@ services:
169155
source: $GOPATH/bin
170156
target: /gobin
171157
read_only: true
172-
command: /gobin/dgraph zero --raft "idx=3;" --my=zero3:5080
173-
--replicas=3 --logtostderr -v=2 --peer=zero1:5080
174-
--trace "jaeger=http://jaeger:14268;"
158+
command: /gobin/dgraph zero --trace "jaeger=http://jaeger:14268;" --raft='idx=3'
159+
--my=zero3:5080 --replicas=3 --logtostderr -v=2 --peer=zero1:5080
175160
volumes: {}

worker/draft.go

+21-10
Original file line numberDiff line numberDiff line change
@@ -997,10 +997,13 @@ func (n *node) updateRaftProgress() error {
997997

998998
func (n *node) checkpointAndClose(done chan struct{}) {
999999
slowTicker := time.NewTicker(time.Minute)
1000+
lastSnapshotTime := time.Now()
10001001
defer slowTicker.Stop()
10011002

1002-
snapshotAfter := x.WorkerConfig.Raft.GetUint64("snapshot-after")
1003-
x.AssertTruef(snapshotAfter > 10, "raft.snapshot-after must be a number greater than 10")
1003+
snapshotAfterEntries := x.WorkerConfig.Raft.GetUint64("snapshot-after-entries")
1004+
x.AssertTruef(snapshotAfterEntries > 10, "raft.snapshot-after must be a number greater than 10")
1005+
1006+
snapshotFrequency := x.WorkerConfig.Raft.GetDuration("snapshot-after-duration")
10041007

10051008
for {
10061009
select {
@@ -1028,16 +1031,22 @@ func (n *node) checkpointAndClose(done chan struct{}) {
10281031
// calculate a new snapshot.
10291032
calculate := raft.IsEmptySnap(snap) || n.Store.NumLogFiles() > 4
10301033

1031-
if chk, err := n.Store.Checkpoint(); err == nil {
1032-
if first, err := n.Store.FirstIndex(); err == nil {
1033-
// Save some cycles by only calculating snapshot if the checkpoint has gone
1034-
// quite a bit further than the first index.
1035-
calculate = calculate || chk >= first+snapshotAfter
1036-
glog.V(3).Infof("Evaluating snapshot first:%d chk:%d (chk-first:%d) "+
1037-
"snapshotAfter:%d snap:%v", first, chk, chk-first,
1038-
snapshotAfter, calculate)
1034+
// Only take snapshot if both snapshotFrequency and
1035+
// snapshotAfterEntries requirements are met. If set to 0,
1036+
// we consider duration condition to be disabled.
1037+
if snapshotFrequency == 0 || time.Since(lastSnapshotTime) > snapshotFrequency {
1038+
if chk, err := n.Store.Checkpoint(); err == nil {
1039+
if first, err := n.Store.FirstIndex(); err == nil {
1040+
// Save some cycles by only calculating snapshot if the checkpoint
1041+
// has gone quite a bit further than the first index.
1042+
calculate = calculate || chk >= first+snapshotAfterEntries
1043+
glog.V(3).Infof("Evaluating snapshot first:%d chk:%d (chk-first:%d) "+
1044+
"snapshotAfterEntries:%d snap:%v", first, chk, chk-first,
1045+
snapshotAfterEntries, calculate)
1046+
}
10391047
}
10401048
}
1049+
10411050
// We keep track of the applied index in the p directory. Even if we don't take
10421051
// snapshot for a while and let the Raft logs grow and restart, we would not have to
10431052
// run all the log entries, because we can tell Raft.Config to set Applied to that
@@ -1055,6 +1064,8 @@ func (n *node) checkpointAndClose(done chan struct{}) {
10551064
// or our checkpoint already crossed the SnapshotAfter threshold.
10561065
if err := n.proposeSnapshot(); err != nil {
10571066
glog.Errorf("While calculating and proposing snapshot: %v", err)
1067+
} else {
1068+
lastSnapshotTime = time.Now()
10581069
}
10591070
}
10601071
go n.abortOldTransactions()

worker/server_state.go

+5-4
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,11 @@ const (
3838
// For easy readability, keep the options without default values (if any) at the end of
3939
// the *Defaults string. Also, since these strings are printed in --help text, avoid line
4040
// breaks.
41-
AclDefaults = `access-ttl=6h; refresh-ttl=30d; secret-file=;`
42-
AuditDefaults = `compress=false; days=10; size=100; dir=; output=; encrypt-file=;`
43-
BadgerDefaults = `compression=snappy; goroutines=8; max-retries=-1;`
44-
RaftDefaults = `learner=false; snapshot-after=10000; pending-proposals=256; idx=; group=;`
41+
AclDefaults = `access-ttl=6h; refresh-ttl=30d; secret-file=;`
42+
AuditDefaults = `compress=false; days=10; size=100; dir=; output=; encrypt-file=;`
43+
BadgerDefaults = `compression=snappy; goroutines=8; max-retries=-1;`
44+
RaftDefaults = `learner=false; snapshot-after-entries=10000; ` +
45+
`snapshot-after-duration=30m; pending-proposals=256; idx=; group=;`
4546
SecurityDefaults = `token=; whitelist=;`
4647
LudicrousDefaults = `enabled=false; concurrency=2000;`
4748
CDCDefaults = `file=; kafka=; sasl_user=; sasl_password=; ca_cert=; client_cert=; ` +

0 commit comments

Comments
 (0)