Skip to content

Commit 3de609e

Browse files
authored
Fix NOT_STARTED statuses appearing inappropirately during node shutdown (#75750) (#76634) (#76669)
* Fix `NOT_STARTED` statuses appearing inappropirately during node shutdown (#75750) This PR fixes two situations where `NOT_STARTED` can appear as the shard migration status inappropriately: 1. When the node is actually shut down after having all the shards migrate away. 2. When a non-data-node is registered for shutdown. It also adds tests to ensure these cases are handled correctly. * Fix compilation for backport * Fix compilation for backport
1 parent 1cc5e32 commit 3de609e

File tree

8 files changed

+357
-28
lines changed

8 files changed

+357
-28
lines changed

server/src/main/java/org/elasticsearch/cluster/metadata/SingleNodeShutdownMetadata.java

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
3939
public static final String STARTED_AT_READABLE_FIELD = "shutdown_started";
4040
public static final ParseField STARTED_AT_MILLIS_FIELD = new ParseField(STARTED_AT_READABLE_FIELD + "millis");
4141
public static final ParseField ALLOCATION_DELAY_FIELD = new ParseField("allocation_delay");
42+
public static final ParseField NODE_SEEN_FIELD = new ParseField("node_seen");
4243

4344
public static final ConstructingObjectParser<SingleNodeShutdownMetadata, Void> PARSER = new ConstructingObjectParser<>(
4445
"node_shutdown_info",
@@ -47,7 +48,8 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
4748
Type.valueOf((String) a[1]),
4849
(String) a[2],
4950
(long) a[3],
50-
(TimeValue) a[4]
51+
(boolean) a[4],
52+
(TimeValue) a[5]
5153
)
5254
);
5355

@@ -56,6 +58,7 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
5658
PARSER.declareString(ConstructingObjectParser.constructorArg(), TYPE_FIELD);
5759
PARSER.declareString(ConstructingObjectParser.constructorArg(), REASON_FIELD);
5860
PARSER.declareLong(ConstructingObjectParser.constructorArg(), STARTED_AT_MILLIS_FIELD);
61+
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), NODE_SEEN_FIELD);
5962
PARSER.declareField(
6063
ConstructingObjectParser.optionalConstructorArg(),
6164
(p, c) -> TimeValue.parseTimeValue(p.textOrNull(), ALLOCATION_DELAY_FIELD.getPreferredName()), ALLOCATION_DELAY_FIELD,
@@ -73,6 +76,7 @@ public static SingleNodeShutdownMetadata parse(XContentParser parser) {
7376
private final Type type;
7477
private final String reason;
7578
private final long startedAtMillis;
79+
private final boolean nodeSeen;
7680
@Nullable private final TimeValue allocationDelay;
7781

7882
/**
@@ -86,12 +90,14 @@ private SingleNodeShutdownMetadata(
8690
Type type,
8791
String reason,
8892
long startedAtMillis,
93+
boolean nodeSeen,
8994
@Nullable TimeValue allocationDelay
9095
) {
9196
this.nodeId = Objects.requireNonNull(nodeId, "node ID must not be null");
9297
this.type = Objects.requireNonNull(type, "shutdown type must not be null");
9398
this.reason = Objects.requireNonNull(reason, "shutdown reason must not be null");
9499
this.startedAtMillis = startedAtMillis;
100+
this.nodeSeen = nodeSeen;
95101
if (allocationDelay != null && Type.RESTART.equals(type) == false) {
96102
throw new IllegalArgumentException("shard allocation delay is only valid for RESTART-type shutdowns");
97103
}
@@ -103,6 +109,7 @@ public SingleNodeShutdownMetadata(StreamInput in) throws IOException {
103109
this.type = in.readEnum(Type.class);
104110
this.reason = in.readString();
105111
this.startedAtMillis = in.readVLong();
112+
this.nodeSeen = in.readBoolean();
106113
this.allocationDelay = in.readOptionalTimeValue();
107114
}
108115

@@ -134,6 +141,13 @@ public long getStartedAtMillis() {
134141
return startedAtMillis;
135142
}
136143

144+
/**
145+
* @return A boolean indicated whether this node has been seen in the cluster since the shutdown was registered.
146+
*/
147+
public boolean getNodeSeen() {
148+
return nodeSeen;
149+
}
150+
137151
/**
138152
* @return The amount of time shard reallocation should be delayed for shards on this node, so that they will not be automatically
139153
* reassigned while the node is restarting. Will be {@code null} for non-restart shutdowns.
@@ -154,6 +168,7 @@ public void writeTo(StreamOutput out) throws IOException {
154168
out.writeEnum(type);
155169
out.writeString(reason);
156170
out.writeVLong(startedAtMillis);
171+
out.writeBoolean(nodeSeen);
157172
out.writeOptionalTimeValue(allocationDelay);
158173
}
159174

@@ -165,6 +180,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
165180
builder.field(TYPE_FIELD.getPreferredName(), type);
166181
builder.field(REASON_FIELD.getPreferredName(), reason);
167182
builder.timeField(STARTED_AT_MILLIS_FIELD.getPreferredName(), STARTED_AT_READABLE_FIELD, startedAtMillis);
183+
builder.field(NODE_SEEN_FIELD.getPreferredName(), nodeSeen);
168184
if (allocationDelay != null) {
169185
builder.field(ALLOCATION_DELAY_FIELD.getPreferredName(), allocationDelay.getStringRep());
170186
}
@@ -183,6 +199,7 @@ public boolean equals(Object o) {
183199
&& getNodeId().equals(that.getNodeId())
184200
&& getType() == that.getType()
185201
&& getReason().equals(that.getReason())
202+
&& getNodeSeen() == that.getNodeSeen()
186203
&& Objects.equals(allocationDelay, that.allocationDelay);
187204
}
188205

@@ -193,6 +210,7 @@ public int hashCode() {
193210
getType(),
194211
getReason(),
195212
getStartedAtMillis(),
213+
getNodeSeen(),
196214
allocationDelay
197215
);
198216
}
@@ -209,14 +227,16 @@ public static Builder builder(SingleNodeShutdownMetadata original) {
209227
.setNodeId(original.getNodeId())
210228
.setType(original.getType())
211229
.setReason(original.getReason())
212-
.setStartedAtMillis(original.getStartedAtMillis());
230+
.setStartedAtMillis(original.getStartedAtMillis())
231+
.setNodeSeen(original.getNodeSeen());
213232
}
214233

215234
public static class Builder {
216235
private String nodeId;
217236
private Type type;
218237
private String reason;
219238
private long startedAtMillis = -1;
239+
private boolean nodeSeen = false;
220240
private TimeValue allocationDelay;
221241

222242
private Builder() {}
@@ -257,6 +277,15 @@ public Builder setStartedAtMillis(long startedAtMillis) {
257277
return this;
258278
}
259279

280+
/**
281+
* @param nodeSeen Whether or not the node has been seen since the shutdown was registered.
282+
* @return This builder.
283+
*/
284+
public Builder setNodeSeen(boolean nodeSeen) {
285+
this.nodeSeen = nodeSeen;
286+
return this;
287+
}
288+
260289
/**
261290
* @param allocationDelay The amount of time shard reallocation should be delayed while this node is offline.
262291
* @return This builder.
@@ -275,7 +304,9 @@ public SingleNodeShutdownMetadata build() {
275304
nodeId,
276305
type,
277306
reason,
278-
startedAtMillis, allocationDelay
307+
startedAtMillis,
308+
nodeSeen,
309+
allocationDelay
279310
);
280311
}
281312
}

server/src/test/java/org/elasticsearch/cluster/metadata/NodesShutdownMetadataTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ private SingleNodeShutdownMetadata randomNodeShutdownInfo() {
8888
if (type.equals(SingleNodeShutdownMetadata.Type.RESTART) && randomBoolean()) {
8989
builder.setAllocationDelay(TimeValue.parseTimeValue(randomTimeValue(), this.getTestName()));
9090
}
91-
return builder.build();
91+
return builder.setNodeSeen(randomBoolean())
92+
.build();
9293
}
9394

9495
@Override
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.shutdown;
9+
10+
import org.elasticsearch.Build;
11+
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
12+
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
13+
import org.elasticsearch.action.support.master.AcknowledgedResponse;
14+
import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata;
15+
import org.elasticsearch.cluster.node.DiscoveryNode;
16+
import org.elasticsearch.common.settings.Settings;
17+
import org.elasticsearch.plugins.Plugin;
18+
import org.elasticsearch.test.ESIntegTestCase;
19+
import org.elasticsearch.test.InternalTestCluster;
20+
21+
import java.util.Arrays;
22+
import java.util.Collection;
23+
24+
import static org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata.Status.COMPLETE;
25+
import static org.hamcrest.Matchers.equalTo;
26+
27+
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, numClientNodes = 0, transportClientRatio = 0)
28+
public class NodeShutdownShardsIT extends ESIntegTestCase {
29+
30+
@Override
31+
protected Collection<Class<? extends Plugin>> nodePlugins() {
32+
return Arrays.asList(ShutdownPlugin.class);
33+
}
34+
35+
/**
36+
* Verifies that a node that's removed from the cluster with zero shards stays in the `COMPLETE` status after it leaves, rather than
37+
* reverting to `NOT_STARTED` (this was a bug in the initial implementation).
38+
*/
39+
public void testShardStatusStaysCompleteAfterNodeLeaves() throws Exception {
40+
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
41+
final String nodeToRestartName = internalCluster().startNode();
42+
final String nodeToRestartId = getNodeId(nodeToRestartName);
43+
internalCluster().startNode();
44+
45+
// Mark the node for shutdown
46+
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
47+
nodeToRestartId,
48+
SingleNodeShutdownMetadata.Type.REMOVE,
49+
this.getTestName(),
50+
null
51+
);
52+
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
53+
assertTrue(putShutdownResponse.isAcknowledged());
54+
55+
internalCluster().stopNode(nodeToRestartName);
56+
57+
NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
58+
assertThat(nodes.getNodes().size(), equalTo(1));
59+
60+
GetShutdownStatusAction.Response getResp = client().execute(
61+
GetShutdownStatusAction.INSTANCE,
62+
new GetShutdownStatusAction.Request(nodeToRestartId)
63+
).get();
64+
65+
assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
66+
}
67+
68+
/**
69+
* Similar to the previous test, but ensures that the status stays at `COMPLETE` when the node is offline when the shutdown is
70+
* registered. This may happen if {@link NodeSeenService} isn't working as expected.
71+
*/
72+
public void testShardStatusStaysCompleteAfterNodeLeavesIfRegisteredWhileNodeOffline() throws Exception {
73+
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
74+
final String nodeToRestartName = internalCluster().startNode();
75+
final String nodeToRestartId = getNodeId(nodeToRestartName);
76+
internalCluster().startNode();
77+
78+
// Stop the node we're going to shut down and mark it as shutting down while it's offline. This checks that the cluster state
79+
// listener is working correctly.
80+
internalCluster().restartNode(nodeToRestartName, new InternalTestCluster.RestartCallback() {
81+
@Override
82+
public Settings onNodeStopped(String nodeName) throws Exception {
83+
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
84+
nodeToRestartId,
85+
SingleNodeShutdownMetadata.Type.REMOVE,
86+
"testShardStatusStaysCompleteAfterNodeLeavesIfRegisteredWhileNodeOffline",
87+
null
88+
);
89+
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
90+
assertTrue(putShutdownResponse.isAcknowledged());
91+
92+
return super.onNodeStopped(nodeName);
93+
}
94+
});
95+
96+
internalCluster().stopNode(nodeToRestartName);
97+
98+
NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
99+
assertThat(nodes.getNodes().size(), equalTo(1));
100+
101+
GetShutdownStatusAction.Response getResp = client().execute(
102+
GetShutdownStatusAction.INSTANCE,
103+
new GetShutdownStatusAction.Request(nodeToRestartId)
104+
).get();
105+
106+
assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
107+
}
108+
109+
/**
110+
* Checks that non-data nodes that are registered for shutdown have a shard migration status of `COMPLETE` rather than `NOT_STARTED`.
111+
* (this was a bug in the initial implementation).
112+
*/
113+
public void testShardStatusIsCompleteOnNonDataNodes() throws Exception {
114+
assumeTrue("must be on a snapshot build of ES to run in order for the feature flag to be set", Build.CURRENT.isSnapshot());
115+
final String nodeToShutDownName = internalCluster().startMasterOnlyNode();
116+
internalCluster().startMasterOnlyNode(); // Just to have at least one other node
117+
final String nodeToRestartId = getNodeId(nodeToShutDownName);
118+
119+
// Mark the node for shutdown
120+
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
121+
nodeToRestartId,
122+
SingleNodeShutdownMetadata.Type.REMOVE,
123+
this.getTestName(),
124+
null
125+
);
126+
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
127+
assertTrue(putShutdownResponse.isAcknowledged());
128+
129+
GetShutdownStatusAction.Response getResp = client().execute(
130+
GetShutdownStatusAction.INSTANCE,
131+
new GetShutdownStatusAction.Request(nodeToRestartId)
132+
).get();
133+
134+
assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
135+
}
136+
137+
private String getNodeId(String nodeName) throws Exception {
138+
NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
139+
return nodes.getNodes()
140+
.stream()
141+
.map(NodeInfo::getNode)
142+
.filter(node -> node.getName().equals(nodeName))
143+
.map(DiscoveryNode::getId)
144+
.findFirst()
145+
.orElseThrow(() -> new AssertionError("requested node name [" + nodeName + "] not found"));
146+
}
147+
}

0 commit comments

Comments
 (0)