GatewayAllocator: reset rerouting flag after error

bleskes · bleskes · commit 8dbe18b8efec · 2015-06-05T21:04:19.000+02:00
After asynchronously fetching shard information the gateway allocator issues a reroute via a cluster state update task. elastic#11421 introduced an optimization trying to avoid submitting unneeded reroutes when results for many shards come in together. This is done by having a rerouting flag, indicating a pending reroute is coming and thus any new incoming shard info doesn't need to issue a reroute. This flag wasn't reset upon an error in the reroute update task. Most notably - if a master node had to step during to a min_master_node violation, it could reject an ongoing reroute. Lacking to reset the flag causing it to skip any future reroute, when the node became master again. Closes elastic#11519
diff --git a/src/main/java/org/elasticsearch/gateway/local/LocalGatewayAllocator.java b/src/main/java/org/elasticsearch/gateway/local/LocalGatewayAllocator.java
@@ -550,6 +550,7 @@ public ClusterState execute(ClusterState currentState) throws Exception {
 
                 @Override
                 public void onFailure(String source, Throwable t) {
+                    rerouting.set(false);
                     logger.warn("failed to perform reroute post async fetch for {}", t, source);
                 }
             });
diff --git a/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java b/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java
@@ -32,7 +32,6 @@
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.test.ElasticsearchIntegrationTest;
 import org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
-import org.elasticsearch.test.junit.annotations.TestLogging;
 import org.junit.Test;
 
 import java.util.concurrent.ExecutionException;
@@ -166,8 +165,6 @@ public void run() {
     }
 
     @Test @LuceneTestCase.Slow
-    @TestLogging("cluster.routing.allocation.allocator:TRACE")
-    @LuceneTestCase.AwaitsFix(bugUrl = "boaz is looking into this")
     public void multipleNodesShutdownNonMasterNodes() throws Exception {
         Settings settings = settingsBuilder()
                 .put("discovery.type", "zen")

Original file line number	Diff line number	Diff line change
`@@ -550,6 +550,7 @@ public ClusterState execute(ClusterState currentState) throws Exception {`
`550`	`550`
`551`	`551`	`@Override`
`552`	`552`	`public void onFailure(String source, Throwable t) {`
	`553`	`+ rerouting.set(false);`
`553`	`554`	`logger.warn("failed to perform reroute post async fetch for {}", t, source);`
`554`	`555`	`}`
`555`	`556`	`});`