Skip to content

Commit a2a40c5

Browse files
authored
Report terms and version if cluster does not form (elastic#37473)
Adds the node's current term and the term and version of the the last-accepted cluster state to the message reported by the `ClusterFormationFailureHelper`, since these values may be of importance when tracking down a cluster formation failure.
1 parent 7c11b05 commit a2a40c5

File tree

3 files changed

+68
-47
lines changed

3 files changed

+68
-47
lines changed

server/src/main/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelper.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,22 +117,25 @@ static class ClusterFormationState {
117117
private final ClusterState clusterState;
118118
private final List<TransportAddress> resolvedAddresses;
119119
private final List<DiscoveryNode> foundPeers;
120+
private final long currentTerm;
120121

121122
ClusterFormationState(Settings settings, ClusterState clusterState, List<TransportAddress> resolvedAddresses,
122-
List<DiscoveryNode> foundPeers) {
123+
List<DiscoveryNode> foundPeers, long currentTerm) {
123124
this.settings = settings;
124125
this.clusterState = clusterState;
125126
this.resolvedAddresses = resolvedAddresses;
126127
this.foundPeers = foundPeers;
128+
this.currentTerm = currentTerm;
127129
}
128130

129131
String getDescription() {
130132
final List<String> clusterStateNodes
131133
= StreamSupport.stream(clusterState.nodes().spliterator(), false).map(DiscoveryNode::toString).collect(Collectors.toList());
132134

133135
final String discoveryWillContinueDescription = String.format(Locale.ROOT,
134-
"discovery will continue using %s from hosts providers and %s from last-known cluster state",
135-
resolvedAddresses, clusterStateNodes);
136+
"discovery will continue using %s from hosts providers and %s from last-known cluster state; " +
137+
"node term %d, last-accepted version %d in term %d",
138+
resolvedAddresses, clusterStateNodes, currentTerm, clusterState.version(), clusterState.term());
136139

137140
final String discoveryStateIgnoringQuorum = String.format(Locale.ROOT, "have discovered %s; %s",
138141
foundPeers, discoveryWillContinueDescription);

server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ public Coordinator(String nodeName, Settings settings, ClusterSettings clusterSe
182182

183183
private ClusterFormationState getClusterFormationState() {
184184
return new ClusterFormationState(settings, getStateForMasterService(), peerFinder.getLastResolvedAddresses(),
185-
StreamSupport.stream(peerFinder.getFoundPeers().spliterator(), false).collect(Collectors.toList()));
185+
StreamSupport.stream(peerFinder.getFoundPeers().spliterator(), false).collect(Collectors.toList()), getCurrentTerm());
186186
}
187187

188188
private Runnable getOnLeaderFailure() {

server/src/test/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelperTests.java

Lines changed: 61 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ public void testScheduling() {
6868
final ClusterFormationFailureHelper clusterFormationFailureHelper = new ClusterFormationFailureHelper(settingsBuilder.build(),
6969
() -> {
7070
warningCount.incrementAndGet();
71-
return new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList());
71+
return new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 0L);
7272
},
7373
deterministicTaskQueue.getThreadPool());
7474

@@ -131,51 +131,57 @@ public void testScheduling() {
131131
public void testDescriptionOnMasterIneligibleNodes() {
132132
final DiscoveryNode localNode = new DiscoveryNode("local", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT);
133133
final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
134-
.nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId())).build();
134+
.version(12L).nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId())).build();
135135

136-
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList()).getDescription(),
136+
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 15L).getDescription(),
137137
is("master not discovered yet: have discovered []; discovery will continue using [] from hosts providers and [" + localNode +
138-
"] from last-known cluster state"));
138+
"] from last-known cluster state; node term 15, last-accepted version 12 in term 0"));
139139

140140
final TransportAddress otherAddress = buildNewFakeTransportAddress();
141-
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList()).getDescription(),
141+
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList(), 16L).getDescription(),
142142
is("master not discovered yet: have discovered []; discovery will continue using [" + otherAddress +
143-
"] from hosts providers and [" + localNode + "] from last-known cluster state"));
143+
"] from hosts providers and [" + localNode +
144+
"] from last-known cluster state; node term 16, last-accepted version 12 in term 0"));
144145

145146
final DiscoveryNode otherNode = new DiscoveryNode("other", buildNewFakeTransportAddress(), Version.CURRENT);
146-
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode)).getDescription(),
147+
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode), 17L).getDescription(),
147148
is("master not discovered yet: have discovered [" + otherNode + "]; discovery will continue using [] from hosts providers and ["
148-
+ localNode + "] from last-known cluster state"));
149+
+ localNode + "] from last-known cluster state; node term 17, last-accepted version 12 in term 0"));
149150
}
150151

151152
public void testDescriptionBeforeBootstrapping() {
152153
final DiscoveryNode localNode = new DiscoveryNode("local", buildNewFakeTransportAddress(), Version.CURRENT);
153154
final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
155+
.version(7L)
156+
.metaData(MetaData.builder().coordinationMetaData(CoordinationMetaData.builder().term(4L).build()))
154157
.nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId())).build();
155158

156-
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList()).getDescription(),
159+
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 1L).getDescription(),
157160
is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " +
158161
"[cluster.initial_master_nodes] is empty on this node: have discovered []; " +
159-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
162+
"discovery will continue using [] from hosts providers and [" + localNode +
163+
"] from last-known cluster state; node term 1, last-accepted version 7 in term 4"));
160164

161165
final TransportAddress otherAddress = buildNewFakeTransportAddress();
162-
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList()).getDescription(),
166+
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList(), 2L).getDescription(),
163167
is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " +
164168
"[cluster.initial_master_nodes] is empty on this node: have discovered []; " +
165169
"discovery will continue using [" + otherAddress + "] from hosts providers and [" + localNode +
166-
"] from last-known cluster state"));
170+
"] from last-known cluster state; node term 2, last-accepted version 7 in term 4"));
167171

168172
final DiscoveryNode otherNode = new DiscoveryNode("other", buildNewFakeTransportAddress(), Version.CURRENT);
169-
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode)).getDescription(),
173+
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode), 3L).getDescription(),
170174
is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " +
171175
"[cluster.initial_master_nodes] is empty on this node: have discovered [" + otherNode + "]; " +
172-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
176+
"discovery will continue using [] from hosts providers and [" + localNode +
177+
"] from last-known cluster state; node term 3, last-accepted version 7 in term 4"));
173178

174179
assertThat(new ClusterFormationState(Settings.builder().putList(INITIAL_MASTER_NODES_SETTING.getKey(), "other").build(),
175-
clusterState, emptyList(), emptyList()).getDescription(),
180+
clusterState, emptyList(), emptyList(), 4L).getDescription(),
176181
is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " +
177182
"this node must discover master-eligible nodes [other] to bootstrap a cluster: have discovered []; " +
178-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
183+
"discovery will continue using [] from hosts providers and [" + localNode +
184+
"] from last-known cluster state; node term 4, last-accepted version 7 in term 4"));
179185
}
180186

181187
private static VotingConfiguration config(String[] nodeIds) {
@@ -199,75 +205,87 @@ public void testDescriptionAfterBootstrapping() {
199205

200206
final ClusterState clusterState = state(localNode, "otherNode");
201207

202-
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList()).getDescription(),
208+
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 0L).getDescription(),
203209
is("master not discovered or elected yet, an election requires a node with id [otherNode], " +
204210
"have discovered [] which is not a quorum; " +
205-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
211+
"discovery will continue using [] from hosts providers and [" + localNode +
212+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
206213

207214
final TransportAddress otherAddress = buildNewFakeTransportAddress();
208-
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList()).getDescription(),
215+
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList(), 0L).getDescription(),
209216
is("master not discovered or elected yet, an election requires a node with id [otherNode], " +
210217
"have discovered [] which is not a quorum; " +
211218
"discovery will continue using [" + otherAddress + "] from hosts providers and [" + localNode +
212-
"] from last-known cluster state"));
219+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
213220

214221
final DiscoveryNode otherNode = new DiscoveryNode("otherNode", buildNewFakeTransportAddress(), Version.CURRENT);
215-
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode)).getDescription(),
222+
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode), 0L).getDescription(),
216223
is("master not discovered or elected yet, an election requires a node with id [otherNode], " +
217224
"have discovered [" + otherNode + "] which is a quorum; " +
218-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
225+
"discovery will continue using [] from hosts providers and [" + localNode +
226+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
219227

220228
final DiscoveryNode yetAnotherNode = new DiscoveryNode("yetAnotherNode", buildNewFakeTransportAddress(), Version.CURRENT);
221-
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(yetAnotherNode)).getDescription(),
229+
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(yetAnotherNode), 0L).getDescription(),
222230
is("master not discovered or elected yet, an election requires a node with id [otherNode], " +
223231
"have discovered [" + yetAnotherNode + "] which is not a quorum; " +
224-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
232+
"discovery will continue using [] from hosts providers and [" + localNode +
233+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
225234

226-
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2"), emptyList(), emptyList()).getDescription(),
235+
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2"), emptyList(), emptyList(), 0L).getDescription(),
227236
is("master not discovered or elected yet, an election requires two nodes with ids [n1, n2], " +
228237
"have discovered [] which is not a quorum; " +
229-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
238+
"discovery will continue using [] from hosts providers and [" + localNode +
239+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
230240

231-
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3"), emptyList(), emptyList()).getDescription(),
241+
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3"), emptyList(), emptyList(), 0L)
242+
.getDescription(),
232243
is("master not discovered or elected yet, an election requires at least 2 nodes with ids from [n1, n2, n3], " +
233244
"have discovered [] which is not a quorum; " +
234-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
245+
"discovery will continue using [] from hosts providers and [" + localNode +
246+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
235247

236-
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3", "n4"), emptyList(), emptyList())
248+
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3", "n4"), emptyList(), emptyList(), 0L)
237249
.getDescription(),
238250
is("master not discovered or elected yet, an election requires at least 3 nodes with ids from [n1, n2, n3, n4], " +
239251
"have discovered [] which is not a quorum; " +
240-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
252+
"discovery will continue using [] from hosts providers and [" + localNode +
253+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
241254

242-
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3", "n4", "n5"), emptyList(), emptyList())
255+
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3", "n4", "n5"), emptyList(), emptyList(), 0L)
243256
.getDescription(),
244257
is("master not discovered or elected yet, an election requires at least 3 nodes with ids from [n1, n2, n3, n4, n5], " +
245258
"have discovered [] which is not a quorum; " +
246-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
259+
"discovery will continue using [] from hosts providers and [" + localNode +
260+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
247261

248-
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n1"}),
249-
emptyList(), emptyList()).getDescription(),
262+
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n1"}), emptyList(),
263+
emptyList(), 0L).getDescription(),
250264
is("master not discovered or elected yet, an election requires a node with id [n1], " +
251265
"have discovered [] which is not a quorum; " +
252-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
266+
"discovery will continue using [] from hosts providers and [" + localNode +
267+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
253268

254-
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n2"}),
255-
emptyList(), emptyList()).getDescription(),
269+
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n2"}), emptyList(),
270+
emptyList(), 0L).getDescription(),
256271
is("master not discovered or elected yet, an election requires a node with id [n1] and a node with id [n2], " +
257272
"have discovered [] which is not a quorum; " +
258-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
273+
"discovery will continue using [] from hosts providers and [" + localNode +
274+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
259275

260-
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n2", "n3"}),
261-
emptyList(), emptyList()).getDescription(),
276+
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n2", "n3"}), emptyList(),
277+
emptyList(), 0L).getDescription(),
262278
is("master not discovered or elected yet, an election requires a node with id [n1] and two nodes with ids [n2, n3], " +
263279
"have discovered [] which is not a quorum; " +
264-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
280+
"discovery will continue using [] from hosts providers and [" + localNode +
281+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
265282

266283
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n2", "n3", "n4"}),
267-
emptyList(), emptyList()).getDescription(),
284+
emptyList(), emptyList(), 0L).getDescription(),
268285
is("master not discovered or elected yet, an election requires a node with id [n1] and " +
269286
"at least 2 nodes with ids from [n2, n3, n4], " +
270287
"have discovered [] which is not a quorum; " +
271-
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state"));
288+
"discovery will continue using [] from hosts providers and [" + localNode +
289+
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
272290
}
273291
}

0 commit comments

Comments
 (0)