9696import org .elasticsearch .cluster .ClusterStateListener ;
9797import org .elasticsearch .cluster .ESAllocationTestCase ;
9898import org .elasticsearch .cluster .NodeConnectionsService ;
99+ import org .elasticsearch .cluster .SnapshotDeletionsInProgress ;
99100import org .elasticsearch .cluster .SnapshotsInProgress ;
100101import org .elasticsearch .cluster .action .index .MappingUpdatedAction ;
101102import org .elasticsearch .cluster .action .index .NodeMappingRefreshAction ;
@@ -411,6 +412,49 @@ public void testSnapshotWithNodeDisconnects() {
411412 assertThat (snapshotIds , hasSize (1 ));
412413 }
413414
415+ public void testSnapshotDeleteWithMasterFailover () {
416+ final int dataNodes = randomIntBetween (2 , 10 );
417+ final int masterNodes = randomFrom (3 , 5 );
418+ setupTestCluster (masterNodes , dataNodes );
419+
420+ String repoName = "repo" ;
421+ String snapshotName = "snapshot" ;
422+ final String index = "test" ;
423+ final int shards = randomIntBetween (1 , 10 );
424+
425+ final boolean waitForSnapshot = randomBoolean ();
426+ final StepListener <CreateSnapshotResponse > createSnapshotResponseStepListener = new StepListener <>();
427+ continueOrDie (createRepoAndIndex (repoName , index , shards ), createIndexResponse ->
428+ testClusterNodes .randomMasterNodeSafe ().client .admin ().cluster ().prepareCreateSnapshot (repoName , snapshotName )
429+ .setWaitForCompletion (waitForSnapshot ).execute (createSnapshotResponseStepListener ));
430+
431+ final AtomicBoolean snapshotDeleteResponded = new AtomicBoolean (false );
432+ continueOrDie (createSnapshotResponseStepListener , createSnapshotResponse -> {
433+ scheduleNow (this ::disconnectOrRestartMasterNode );
434+ testClusterNodes .randomDataNodeSafe ().client .admin ().cluster ()
435+ .prepareDeleteSnapshot (repoName , snapshotName ).execute (ActionListener .wrap (() -> snapshotDeleteResponded .set (true )));
436+ });
437+
438+ runUntil (() -> testClusterNodes .randomMasterNode ().map (master -> {
439+ if (snapshotDeleteResponded .get () == false ) {
440+ return false ;
441+ }
442+ final SnapshotDeletionsInProgress snapshotDeletionsInProgress =
443+ master .clusterService .state ().custom (SnapshotDeletionsInProgress .TYPE );
444+ return snapshotDeletionsInProgress == null || snapshotDeletionsInProgress .getEntries ().isEmpty ();
445+ }).orElse (false ), TimeUnit .MINUTES .toMillis (1L ));
446+
447+ clearDisruptionsAndAwaitSync ();
448+
449+ final TestClusterNodes .TestClusterNode randomMaster = testClusterNodes .randomMasterNode ()
450+ .orElseThrow (() -> new AssertionError ("expected to find at least one active master node" ));
451+ SnapshotsInProgress finalSnapshotsInProgress = randomMaster .clusterService .state ().custom (SnapshotsInProgress .TYPE );
452+ assertThat (finalSnapshotsInProgress .entries (), empty ());
453+ final Repository repository = randomMaster .repositoriesService .repository (repoName );
454+ Collection <SnapshotId > snapshotIds = getRepositoryData (repository ).getSnapshotIds ();
455+ assertThat (snapshotIds , hasSize (0 ));
456+ }
457+
414458 public void testConcurrentSnapshotCreateAndDelete () {
415459 setupTestCluster (randomFrom (1 , 3 , 5 ), randomIntBetween (2 , 10 ));
416460
@@ -1159,6 +1203,8 @@ private final class TestClusterNode {
11591203
11601204 private final ClusterService clusterService ;
11611205
1206+ private final NodeConnectionsService nodeConnectionsService ;
1207+
11621208 private final RepositoriesService repositoriesService ;
11631209
11641210 private final SnapshotsService snapshotsService ;
@@ -1310,6 +1356,8 @@ public void onFailure(final Exception e) {
13101356 new BatchedRerouteService (clusterService , allocationService ::reroute ),
13111357 threadPool
13121358 );
1359+ nodeConnectionsService =
1360+ new NodeConnectionsService (clusterService .getSettings (), threadPool , transportService );
13131361 final MetadataMappingService metadataMappingService = new MetadataMappingService (clusterService , indicesService );
13141362 indicesClusterStateService = new IndicesClusterStateService (
13151363 settings ,
@@ -1467,6 +1515,7 @@ public void stop() {
14671515 testClusterNodes .disconnectNode (this );
14681516 indicesService .close ();
14691517 clusterService .close ();
1518+ nodeConnectionsService .stop ();
14701519 indicesClusterStateService .close ();
14711520 if (coordinator != null ) {
14721521 coordinator .close ();
@@ -1491,10 +1540,9 @@ public void start(ClusterState initialState) {
14911540 new BatchedRerouteService (clusterService , allocationService ::reroute ), ElectionStrategy .DEFAULT_INSTANCE );
14921541 masterService .setClusterStatePublisher (coordinator );
14931542 coordinator .start ();
1494- masterService .start ();
1495- clusterService .getClusterApplierService ().setNodeConnectionsService (
1496- new NodeConnectionsService (clusterService .getSettings (), threadPool , transportService ));
1497- clusterService .getClusterApplierService ().start ();
1543+ clusterService .getClusterApplierService ().setNodeConnectionsService (nodeConnectionsService );
1544+ nodeConnectionsService .start ();
1545+ clusterService .start ();
14981546 indicesService .start ();
14991547 indicesClusterStateService .start ();
15001548 coordinator .startInitialJoin ();
0 commit comments