diff --git a/worker/draft.go b/worker/draft.go index b200b3fa4b3..06bba941dba 100644 --- a/worker/draft.go +++ b/worker/draft.go @@ -933,12 +933,22 @@ func (n *node) checkpointAndClose(done chan struct{}) { } if n.AmLeader() { - var calculate bool + // If leader doesn't have a snapshot, we should create one immediately. This is very + // useful when you bring up the cluster from bulk loader. If you remove an alpha and + // add a new alpha, the new follower won't get a snapshot if the leader doesn't have + // one. + snap, err := n.Store.Snapshot() + if err != nil { + glog.Errorf("While retrieving snapshot from Store: %v\n", err) + continue + } + calculate := raft.IsEmptySnap(snap) // If no snapshot, then calculate one immediately. + if chk, err := n.Store.Checkpoint(); err == nil { if first, err := n.Store.FirstIndex(); err == nil { // Save some cycles by only calculating snapshot if the checkpoint has gone // quite a bit further than the first index. - calculate = chk >= first+uint64(x.WorkerConfig.SnapshotAfter) + calculate = calculate || chk >= first+uint64(x.WorkerConfig.SnapshotAfter) glog.V(3).Infof("Evaluating snapshot first:%d chk:%d (chk-first:%d) "+ "snapshotAfter:%d snap:%v", first, chk, chk-first, x.WorkerConfig.SnapshotAfter, calculate) @@ -956,7 +966,10 @@ func (n *node) checkpointAndClose(done chan struct{}) { // snapshotting. We just need to do enough, so that we don't have a huge backlog of // entries to process on a restart. if calculate { - if err := n.proposeSnapshot(x.WorkerConfig.SnapshotAfter); err != nil { + // We can set discardN argument to zero, because we already know that calculate + // would be true if either we absolutely needed to calculate the snapshot, + // or our checkpoint already crossed the SnapshotAfter threshold. + if err := n.proposeSnapshot(0); err != nil { glog.Errorf("While calculating and proposing snapshot: %v", err) } }