Skip to content

Commit 8939a78

Browse files
authored
Zen2: Move disruption tests to Zen2 (#35724)
- Moves disruption tests to Zen2 - Registers a few missing settings - Removes .put(TestZenDiscovery.USE_ZEN2.getKey(), true) from tests where Zen2 is now enabled by default through the parent test class - Moves QuorumGatewayIT back to Zen1, as it is not stable with Zen2 as it currently relies on dangling indices due to the lack of proper CS persistence, which triggers secondary failures
1 parent 47ada69 commit 8939a78

File tree

10 files changed

+68
-58
lines changed

10 files changed

+68
-58
lines changed

server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@
3535
import org.elasticsearch.cluster.coordination.ClusterBootstrapService;
3636
import org.elasticsearch.cluster.coordination.Coordinator;
3737
import org.elasticsearch.cluster.coordination.ElectionSchedulerFactory;
38+
import org.elasticsearch.cluster.coordination.FollowersChecker;
3839
import org.elasticsearch.cluster.coordination.JoinHelper;
40+
import org.elasticsearch.cluster.coordination.LeaderChecker;
3941
import org.elasticsearch.cluster.coordination.Reconfigurator;
4042
import org.elasticsearch.cluster.metadata.IndexGraveyard;
4143
import org.elasticsearch.cluster.metadata.MetaData;
@@ -459,6 +461,12 @@ public void apply(Settings value, Settings current, Settings previous) {
459461
ElectionSchedulerFactory.ELECTION_DURATION_SETTING,
460462
Coordinator.PUBLISH_TIMEOUT_SETTING,
461463
JoinHelper.JOIN_TIMEOUT_SETTING,
464+
FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING,
465+
FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING,
466+
FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING,
467+
LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING,
468+
LeaderChecker.LEADER_CHECK_INTERVAL_SETTING,
469+
LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING,
462470
Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION,
463471
TransportAddVotingTombstonesAction.MAXIMUM_VOTING_TOMBSTONES_SETTING,
464472
ClusterBootstrapService.INITIAL_MASTER_NODE_COUNT_SETTING

server/src/test/java/org/elasticsearch/action/admin/indices/stats/IndicesStatsBlocksIT.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,8 @@
2121

2222
import org.elasticsearch.cluster.block.ClusterBlockException;
2323
import org.elasticsearch.cluster.metadata.IndexMetaData;
24-
import org.elasticsearch.common.settings.Settings;
2524
import org.elasticsearch.test.ESIntegTestCase;
2625
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
27-
import org.elasticsearch.test.discovery.TestZenDiscovery;
2826

2927
import java.util.Arrays;
3028

@@ -36,13 +34,6 @@
3634
@ClusterScope(scope = ESIntegTestCase.Scope.TEST)
3735
public class IndicesStatsBlocksIT extends ESIntegTestCase {
3836

39-
@Override
40-
protected Settings nodeSettings(int nodeOrdinal) {
41-
return Settings.builder().put(super.nodeSettings(nodeOrdinal))
42-
.put(TestZenDiscovery.USE_ZEN2.getKey(), true)
43-
.build();
44-
}
45-
4637
public void testIndicesStatsWithBlocks() {
4738
createIndex("ro");
4839
ensureGreen("ro");

server/src/test/java/org/elasticsearch/cluster/routing/AllocationIdIT.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
import org.elasticsearch.test.ESIntegTestCase;
4646
import org.elasticsearch.test.InternalSettingsPlugin;
4747
import org.elasticsearch.test.InternalTestCluster;
48-
import org.elasticsearch.test.discovery.TestZenDiscovery;
4948
import org.elasticsearch.test.transport.MockTransportService;
5049

5150
import java.io.IOException;
@@ -67,13 +66,6 @@
6766
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, numDataNodes = 0)
6867
public class AllocationIdIT extends ESIntegTestCase {
6968

70-
@Override
71-
protected Settings nodeSettings(int nodeOrdinal) {
72-
return Settings.builder().put(super.nodeSettings(nodeOrdinal))
73-
.put(TestZenDiscovery.USE_ZEN2.getKey(), true)
74-
.build();
75-
}
76-
7769
@Override
7870
protected Collection<Class<? extends Plugin>> nodePlugins() {
7971
return Arrays.asList(MockTransportService.TestPlugin.class, MockEngineFactoryPlugin.class, InternalSettingsPlugin.class);

server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
import org.elasticsearch.cluster.ClusterState;
2323
import org.elasticsearch.cluster.block.ClusterBlock;
2424
import org.elasticsearch.cluster.block.ClusterBlockLevel;
25+
import org.elasticsearch.cluster.coordination.Coordinator;
26+
import org.elasticsearch.cluster.coordination.FollowersChecker;
27+
import org.elasticsearch.cluster.coordination.JoinHelper;
28+
import org.elasticsearch.cluster.coordination.LeaderChecker;
2529
import org.elasticsearch.cluster.node.DiscoveryNodes;
2630
import org.elasticsearch.common.Nullable;
2731
import org.elasticsearch.common.settings.Settings;
@@ -63,7 +67,6 @@ public abstract class AbstractDisruptionTestCase extends ESIntegTestCase {
6367
@Override
6468
protected Settings nodeSettings(int nodeOrdinal) {
6569
return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(DEFAULT_SETTINGS)
66-
.put(TestZenDiscovery.USE_ZEN2.getKey(), false) // requires more work
6770
.put(TestZenDiscovery.USE_MOCK_PINGS.getKey(), false).build();
6871
}
6972

@@ -114,18 +117,31 @@ List<String> startCluster(int numberOfNodes) {
114117
ensureStableCluster(numberOfNodes);
115118

116119
// TODO: this is a temporary solution so that nodes will not base their reaction to a partition based on previous successful results
117-
ZenPing zenPing = ((TestZenDiscovery) internalCluster().getInstance(Discovery.class)).getZenPing();
118-
if (zenPing instanceof UnicastZenPing) {
119-
((UnicastZenPing) zenPing).clearTemporalResponses();
120-
}
120+
clearTemporalResponses();
121121
return nodes;
122122
}
123123

124+
protected void clearTemporalResponses() {
125+
final Discovery discovery = internalCluster().getInstance(Discovery.class);
126+
if (discovery instanceof TestZenDiscovery) {
127+
ZenPing zenPing = ((TestZenDiscovery) discovery).getZenPing();
128+
if (zenPing instanceof UnicastZenPing) {
129+
((UnicastZenPing) zenPing).clearTemporalResponses();
130+
}
131+
}
132+
}
133+
124134
static final Settings DEFAULT_SETTINGS = Settings.builder()
125135
.put(FaultDetection.PING_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
126136
.put(FaultDetection.PING_RETRIES_SETTING.getKey(), "1") // for hitting simulated network failures quickly
137+
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
138+
.put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
139+
.put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
140+
.put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
127141
.put("discovery.zen.join_timeout", "10s") // still long to induce failures but to long so test won't time out
142+
.put(JoinHelper.JOIN_TIMEOUT_SETTING.getKey(), "10s") // still long to induce failures but to long so test won't time out
128143
.put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly
144+
.put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly
129145
.put(TransportService.TCP_CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this
130146
// value and the time of disruption and does not recover immediately
131147
// when disruption is stop. We should make sure we recover faster

server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.elasticsearch.common.xcontent.XContentType;
3838
import org.elasticsearch.test.ESIntegTestCase;
3939
import org.elasticsearch.test.InternalTestCluster;
40+
import org.elasticsearch.test.discovery.TestZenDiscovery;
4041
import org.elasticsearch.test.disruption.NetworkDisruption;
4142
import org.elasticsearch.test.disruption.NetworkDisruption.Bridge;
4243
import org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect;
@@ -356,8 +357,9 @@ public void onFailure(Exception e) {
356357

357358
public void testIndexImportedFromDataOnlyNodesIfMasterLostDataFolder() throws Exception {
358359
// test for https://github.com/elastic/elasticsearch/issues/8823
359-
String masterNode = internalCluster().startMasterOnlyNode(Settings.EMPTY);
360-
internalCluster().startDataOnlyNode(Settings.EMPTY);
360+
Settings zen1Settings = Settings.builder().put(TestZenDiscovery.USE_ZEN2.getKey(), false).build(); // TODO: needs adaptions for Zen2
361+
String masterNode = internalCluster().startMasterOnlyNode(zen1Settings);
362+
internalCluster().startDataOnlyNode(zen1Settings);
361363
ensureStableCluster(2);
362364
assertAcked(prepareCreate("index").setSettings(Settings.builder().put("index.number_of_replicas", 0)));
363365
index("index", "_doc", "1", jsonBuilder().startObject().field("text", "some text").endObject());

server/src/test/java/org/elasticsearch/discovery/DiscoveryDisruptionIT.java

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@
2020
package org.elasticsearch.discovery;
2121

2222
import org.elasticsearch.cluster.ClusterState;
23+
import org.elasticsearch.cluster.coordination.JoinHelper;
24+
import org.elasticsearch.cluster.coordination.PublicationTransportHandler;
2325
import org.elasticsearch.cluster.node.DiscoveryNode;
2426
import org.elasticsearch.cluster.node.DiscoveryNodes;
2527
import org.elasticsearch.cluster.service.ClusterService;
2628
import org.elasticsearch.common.settings.Settings;
2729
import org.elasticsearch.discovery.zen.MembershipAction;
2830
import org.elasticsearch.discovery.zen.PublishClusterStateAction;
29-
import org.elasticsearch.discovery.zen.UnicastZenPing;
30-
import org.elasticsearch.discovery.zen.ZenPing;
31+
import org.elasticsearch.discovery.zen.ZenDiscovery;
3132
import org.elasticsearch.test.ESIntegTestCase;
32-
import org.elasticsearch.test.discovery.TestZenDiscovery;
3333
import org.elasticsearch.test.disruption.NetworkDisruption;
3434
import org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect;
3535
import org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions;
@@ -73,10 +73,7 @@ public void testIsolatedUnicastNodes() throws Exception {
7373

7474
// Forcefully clean temporal response lists on all nodes. Otherwise the node in the unicast host list
7575
// includes all the other nodes that have pinged it and the issue doesn't manifest
76-
ZenPing zenPing = ((TestZenDiscovery) internalCluster().getInstance(Discovery.class)).getZenPing();
77-
if (zenPing instanceof UnicastZenPing) {
78-
((UnicastZenPing) zenPing).clearTemporalResponses();
79-
}
76+
clearTemporalResponses();
8077

8178
// Simulate a network issue between the unicast target node and the rest of the cluster
8279
NetworkDisruption networkDisconnect = new NetworkDisruption(new TwoPartitions(unicastTargetSide, restOfClusterSide),
@@ -111,10 +108,7 @@ public void testUnicastSinglePingResponseContainsMaster() throws Exception {
111108

112109
// Forcefully clean temporal response lists on all nodes. Otherwise the node in the unicast host list
113110
// includes all the other nodes that have pinged it and the issue doesn't manifest
114-
ZenPing zenPing = ((TestZenDiscovery) internalCluster().getInstance(Discovery.class)).getZenPing();
115-
if (zenPing instanceof UnicastZenPing) {
116-
((UnicastZenPing) zenPing).clearTemporalResponses();
117-
}
111+
clearTemporalResponses();
118112

119113
// Simulate a network issue between the unlucky node and elected master node in both directions.
120114
NetworkDisruption networkDisconnect = new NetworkDisruption(new TwoPartitions(masterNode, isolatedNode),
@@ -160,14 +154,17 @@ public void testClusterJoinDespiteOfPublishingIssues() throws Exception {
160154
internalCluster().getInstance(TransportService.class, discoveryNodes.getLocalNode().getName());
161155
if (randomBoolean()) {
162156
masterTransportService.addFailToSendNoConnectRule(localTransportService, PublishClusterStateAction.SEND_ACTION_NAME);
157+
masterTransportService.addFailToSendNoConnectRule(localTransportService, PublicationTransportHandler.PUBLISH_STATE_ACTION_NAME);
163158
} else {
164159
masterTransportService.addFailToSendNoConnectRule(localTransportService, PublishClusterStateAction.COMMIT_ACTION_NAME);
160+
masterTransportService.addFailToSendNoConnectRule(localTransportService, PublicationTransportHandler.COMMIT_STATE_ACTION_NAME);
165161
}
166162

167163
logger.info("allowing requests from non master [{}] to master [{}], waiting for two join request", nonMasterNode, masterNode);
168164
final CountDownLatch countDownLatch = new CountDownLatch(2);
169165
nonMasterTransportService.addSendBehavior(masterTransportService, (connection, requestId, action, request, options) -> {
170-
if (action.equals(MembershipAction.DISCOVERY_JOIN_ACTION_NAME)) {
166+
if (action.equals(MembershipAction.DISCOVERY_JOIN_ACTION_NAME) ||
167+
action.equals(JoinHelper.JOIN_ACTION_NAME)) {
171168
countDownLatch.countDown();
172169
}
173170
connection.sendRequest(requestId, action, request, options);
@@ -219,9 +216,13 @@ public void testElectMasterWithLatestVersion() throws Exception {
219216
ensureStableCluster(3);
220217
final String preferredMasterName = internalCluster().getMasterName();
221218
final DiscoveryNode preferredMaster = internalCluster().clusterService(preferredMasterName).localNode();
222-
for (String node : nodes) {
223-
DiscoveryNode discoveryNode = internalCluster().clusterService(node).localNode();
224-
assertThat(discoveryNode.getId(), greaterThanOrEqualTo(preferredMaster.getId()));
219+
final Discovery discovery = internalCluster().getInstance(Discovery.class);
220+
// only Zen1 guarantees that node with lowest id is elected
221+
if (discovery instanceof ZenDiscovery) {
222+
for (String node : nodes) {
223+
DiscoveryNode discoveryNode = internalCluster().clusterService(node).localNode();
224+
assertThat(discoveryNode.getId(), greaterThanOrEqualTo(preferredMaster.getId()));
225+
}
225226
}
226227

227228
logger.info("--> preferred master is {}", preferredMaster);

server/src/test/java/org/elasticsearch/discovery/MasterDisruptionIT.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.elasticsearch.discovery.zen.ZenDiscovery;
4040
import org.elasticsearch.monitor.jvm.HotThreads;
4141
import org.elasticsearch.test.ESIntegTestCase;
42+
import org.elasticsearch.test.discovery.TestZenDiscovery;
4243
import org.elasticsearch.test.disruption.BlockMasterServiceOnMaster;
4344
import org.elasticsearch.test.disruption.IntermittentLongGCDisruption;
4445
import org.elasticsearch.test.disruption.LongGCDisruption;
@@ -379,7 +380,8 @@ public void testIsolateMasterAndVerifyClusterStateConsensus() throws Exception {
379380
* Verify that the proper block is applied when nodes loose their master
380381
*/
381382
public void testVerifyApiBlocksDuringPartition() throws Exception {
382-
startCluster(3);
383+
// TODO: NO_MASTER_BLOCKS not dynamic in Zen2 yet
384+
internalCluster().startNodes(3, Settings.builder().put(TestZenDiscovery.USE_ZEN2.getKey(), false).build());
383385

384386
// Makes sure that the get request can be executed on each node locally:
385387
assertAcked(prepareCreate("test").setSettings(Settings.builder()
@@ -511,7 +513,13 @@ void assertDiscoveryCompleted(List<String> nodes) throws InterruptedException {
511513
assertTrue(
512514
"node [" + node + "] is still joining master",
513515
awaitBusy(
514-
() -> !((ZenDiscovery) internalCluster().getInstance(Discovery.class, node)).joiningCluster(),
516+
() -> {
517+
final Discovery discovery = internalCluster().getInstance(Discovery.class, node);
518+
if (discovery instanceof ZenDiscovery) {
519+
return !((ZenDiscovery) discovery).joiningCluster();
520+
}
521+
return true;
522+
},
515523
30,
516524
TimeUnit.SECONDS
517525
)

server/src/test/java/org/elasticsearch/gateway/QuorumGatewayIT.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
2828
import org.elasticsearch.test.ESIntegTestCase.Scope;
2929
import org.elasticsearch.test.InternalTestCluster.RestartCallback;
30+
import org.elasticsearch.test.discovery.TestZenDiscovery;
3031

3132
import java.util.concurrent.TimeUnit;
3233

@@ -38,6 +39,14 @@
3839

3940
@ClusterScope(numDataNodes = 0, scope = Scope.TEST)
4041
public class QuorumGatewayIT extends ESIntegTestCase {
42+
43+
@Override
44+
protected Settings nodeSettings(int nodeOrdinal) {
45+
return Settings.builder().put(super.nodeSettings(nodeOrdinal))
46+
.put(TestZenDiscovery.USE_ZEN2.getKey(), false) // no state persistence yet
47+
.build();
48+
}
49+
4150
@Override
4251
protected int numberOfReplicas() {
4352
return 2;

server/src/test/java/org/elasticsearch/recovery/RelocationIT.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
import org.elasticsearch.test.ESIntegTestCase.Scope;
5757
import org.elasticsearch.test.InternalSettingsPlugin;
5858
import org.elasticsearch.test.MockIndexEventListener;
59-
import org.elasticsearch.test.discovery.TestZenDiscovery;
6059
import org.elasticsearch.test.junit.annotations.TestLogging;
6160
import org.elasticsearch.test.transport.MockTransportService;
6261
import org.elasticsearch.test.transport.StubbableTransport;
@@ -100,13 +99,6 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
10099
return Arrays.asList(InternalSettingsPlugin.class, MockTransportService.TestPlugin.class, MockIndexEventListener.TestPlugin.class);
101100
}
102101

103-
@Override
104-
protected Settings nodeSettings(int nodeOrdinal) {
105-
return Settings.builder().put(super.nodeSettings(nodeOrdinal))
106-
.put(TestZenDiscovery.USE_ZEN2.getKey(), true)
107-
.build();
108-
}
109-
110102
@Override
111103
protected void beforeIndexDeletion() throws Exception {
112104
super.beforeIndexDeletion();

test/framework/src/test/java/org/elasticsearch/test/test/InternalTestClusterIT.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,8 @@
1818
*/
1919
package org.elasticsearch.test.test;
2020

21-
import org.elasticsearch.common.settings.Settings;
2221
import org.elasticsearch.test.ESIntegTestCase;
2322
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
24-
import org.elasticsearch.test.discovery.TestZenDiscovery;
2523

2624
import java.io.IOException;
2725

@@ -30,13 +28,6 @@
3028
@ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
3129
public class InternalTestClusterIT extends ESIntegTestCase {
3230

33-
@Override
34-
protected Settings nodeSettings(int nodeOrdinal) {
35-
return Settings.builder().put(super.nodeSettings(nodeOrdinal))
36-
.put(TestZenDiscovery.USE_ZEN2.getKey(), true)
37-
.build();
38-
}
39-
4031
public void testStartingAndStoppingNodes() throws IOException {
4132
logger.info("--> cluster has [{}] nodes", internalCluster().size());
4233
if (internalCluster().size() < 5) {

0 commit comments

Comments
 (0)