|
19 | 19 |
|
20 | 20 | package org.elasticsearch.cluster.routing.allocation; |
21 | 21 |
|
| 22 | +import com.carrotsearch.hppc.cursors.ObjectCursor; |
22 | 23 | import org.apache.logging.log4j.Logger; |
23 | 24 | import org.elasticsearch.Version; |
24 | 25 | import org.elasticsearch.cluster.ClusterState; |
25 | 26 | import org.elasticsearch.cluster.ESAllocationTestCase; |
26 | 27 | import org.elasticsearch.cluster.metadata.IndexMetaData; |
27 | 28 | import org.elasticsearch.cluster.metadata.MetaData; |
| 29 | +import org.elasticsearch.cluster.node.DiscoveryNode; |
28 | 30 | import org.elasticsearch.cluster.node.DiscoveryNodes; |
29 | 31 | import org.elasticsearch.cluster.routing.RoutingNodes; |
30 | 32 | import org.elasticsearch.cluster.routing.RoutingTable; |
@@ -499,7 +501,7 @@ public void testFailAllReplicasInitializingOnPrimaryFail() { |
499 | 501 | Collections.singletonList(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0))); |
500 | 502 | assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2)); |
501 | 503 | assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); |
502 | | - ShardRouting startedReplica = clusterState.getRoutingNodes().activeReplica(shardId); |
| 504 | + ShardRouting startedReplica = clusterState.getRoutingNodes().activeReplicaWithHighestVersion(shardId); |
503 | 505 |
|
504 | 506 |
|
505 | 507 | // fail the primary shard, check replicas get removed as well... |
@@ -556,4 +558,118 @@ public void testFailAllReplicasInitializingOnPrimaryFailWhileHavingAReplicaToEle |
556 | 558 | ShardRouting newPrimaryShard = clusterState.routingTable().index("test").shard(0).primaryShard(); |
557 | 559 | assertThat(newPrimaryShard, not(equalTo(primaryShardToFail))); |
558 | 560 | } |
| 561 | + |
| 562 | + public void testReplicaOnNewestVersionIsPromoted() { |
| 563 | + AllocationService allocation = createAllocationService(Settings.builder().build()); |
| 564 | + |
| 565 | + MetaData metaData = MetaData.builder().put(IndexMetaData.builder("test") |
| 566 | + .settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(3)) .build(); |
| 567 | + |
| 568 | + RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); |
| 569 | + |
| 570 | + ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) |
| 571 | + .metaData(metaData).routingTable(initialRoutingTable).build(); |
| 572 | + |
| 573 | + ShardId shardId = new ShardId(metaData.index("test").getIndex(), 0); |
| 574 | + |
| 575 | + // add a single node |
| 576 | + clusterState = ClusterState.builder(clusterState).nodes( |
| 577 | + DiscoveryNodes.builder() |
| 578 | + .add(newNode("node1-5.x", Version.V_5_6_0))) |
| 579 | + .build(); |
| 580 | + clusterState = ClusterState.builder(clusterState).routingTable(allocation.reroute(clusterState, "reroute").routingTable()).build(); |
| 581 | + assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); |
| 582 | + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(3)); |
| 583 | + |
| 584 | + // start primary shard |
| 585 | + clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); |
| 586 | + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1)); |
| 587 | + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(3)); |
| 588 | + |
| 589 | + // add another 5.6 node |
| 590 | + clusterState = ClusterState.builder(clusterState).nodes( |
| 591 | + DiscoveryNodes.builder(clusterState.nodes()) |
| 592 | + .add(newNode("node2-5.x", Version.V_5_6_0))) |
| 593 | + .build(); |
| 594 | + |
| 595 | + // start the shards, should have 1 primary and 1 replica available |
| 596 | + clusterState = allocation.reroute(clusterState, "reroute"); |
| 597 | + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1)); |
| 598 | + assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); |
| 599 | + clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); |
| 600 | + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2)); |
| 601 | + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(2)); |
| 602 | + |
| 603 | + clusterState = ClusterState.builder(clusterState).nodes( |
| 604 | + DiscoveryNodes.builder(clusterState.nodes()) |
| 605 | + .add(newNode("node3-6.x", Version.V_6_0_0_alpha3)) |
| 606 | + .add(newNode("node4-6.x", Version.V_6_0_0_alpha3))) |
| 607 | + .build(); |
| 608 | + |
| 609 | + // start all the replicas |
| 610 | + clusterState = allocation.reroute(clusterState, "reroute"); |
| 611 | + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2)); |
| 612 | + assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(2)); |
| 613 | + clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); |
| 614 | + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(4)); |
| 615 | + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(0)); |
| 616 | + |
| 617 | + ShardRouting startedReplica = clusterState.getRoutingNodes().activeReplicaWithHighestVersion(shardId); |
| 618 | + logger.info("--> all shards allocated, replica that should be promoted: {}", startedReplica); |
| 619 | + |
| 620 | + // fail the primary shard, check replicas get removed as well... |
| 621 | + ShardRouting primaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard(); |
| 622 | + ClusterState newState = allocation.applyFailedShard(clusterState, primaryShardToFail); |
| 623 | + assertThat(newState, not(equalTo(clusterState))); |
| 624 | + clusterState = newState; |
| 625 | + // the primary gets allocated on another node |
| 626 | + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(3)); |
| 627 | + |
| 628 | + ShardRouting newPrimaryShard = clusterState.routingTable().index("test").shard(0).primaryShard(); |
| 629 | + assertThat(newPrimaryShard, not(equalTo(primaryShardToFail))); |
| 630 | + assertThat(newPrimaryShard.allocationId(), equalTo(startedReplica.allocationId())); |
| 631 | + |
| 632 | + Version replicaNodeVersion = clusterState.nodes().getDataNodes().get(startedReplica.currentNodeId()).getVersion(); |
| 633 | + assertNotNull(replicaNodeVersion); |
| 634 | + logger.info("--> shard {} got assigned to node with version {}", startedReplica, replicaNodeVersion); |
| 635 | + |
| 636 | + for (ObjectCursor<DiscoveryNode> cursor : clusterState.nodes().getDataNodes().values()) { |
| 637 | + if ("node1".equals(cursor.value.getId())) { |
| 638 | + // Skip the node that the primary was on, it doesn't have a replica so doesn't need a version check |
| 639 | + continue; |
| 640 | + } |
| 641 | + Version nodeVer = cursor.value.getVersion(); |
| 642 | + assertTrue("expected node [" + cursor.value.getId() + "] with version " + nodeVer + " to be before " + replicaNodeVersion, |
| 643 | + replicaNodeVersion.onOrAfter(nodeVer)); |
| 644 | + } |
| 645 | + |
| 646 | + startedReplica = clusterState.getRoutingNodes().activeReplicaWithHighestVersion(shardId); |
| 647 | + logger.info("--> failing primary shard a second time, should select: {}", startedReplica); |
| 648 | + |
| 649 | + // fail the primary shard again, and ensure the same thing happens |
| 650 | + primaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard(); |
| 651 | + newState = allocation.applyFailedShard(clusterState, primaryShardToFail); |
| 652 | + assertThat(newState, not(equalTo(clusterState))); |
| 653 | + clusterState = newState; |
| 654 | + // the primary gets allocated on another node |
| 655 | + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2)); |
| 656 | + |
| 657 | + newPrimaryShard = clusterState.routingTable().index("test").shard(0).primaryShard(); |
| 658 | + assertThat(newPrimaryShard, not(equalTo(primaryShardToFail))); |
| 659 | + assertThat(newPrimaryShard.allocationId(), equalTo(startedReplica.allocationId())); |
| 660 | + |
| 661 | + replicaNodeVersion = clusterState.nodes().getDataNodes().get(startedReplica.currentNodeId()).getVersion(); |
| 662 | + assertNotNull(replicaNodeVersion); |
| 663 | + logger.info("--> shard {} got assigned to node with version {}", startedReplica, replicaNodeVersion); |
| 664 | + |
| 665 | + for (ObjectCursor<DiscoveryNode> cursor : clusterState.nodes().getDataNodes().values()) { |
| 666 | + if ("node1".equals(cursor.value.getId())) { |
| 667 | + // Skip the node that the primary was on, it doesn't have a replica so doesn't need a version check |
| 668 | + continue; |
| 669 | + } |
| 670 | + Version nodeVer = cursor.value.getVersion(); |
| 671 | + assertTrue("expected node [" + cursor.value.getId() + "] with version " + nodeVer + " to be before " + replicaNodeVersion, |
| 672 | + replicaNodeVersion.onOrAfter(nodeVer)); |
| 673 | + } |
| 674 | + } |
559 | 675 | } |
0 commit comments