-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Replace health request with a state observer. #88641
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
10718f9
4817492
3360044
b3b5638
56609fc
f0441c2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| pr: 88641 | ||
| summary: Replace health request with a state observer | ||
| area: Allocation | ||
| type: bug | ||
| issues: [] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,6 +31,7 @@ | |
| import java.util.stream.Collectors; | ||
| import java.util.stream.Stream; | ||
|
|
||
| import static org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING; | ||
| import static org.hamcrest.Matchers.equalTo; | ||
| import static org.hamcrest.Matchers.lessThan; | ||
| import static org.hamcrest.Matchers.lessThanOrEqualTo; | ||
|
|
@@ -70,9 +71,7 @@ public void testGetGlobalCheckpoints() throws Exception { | |
| ); | ||
| final GetGlobalCheckpointsAction.Response response = client().execute(GetGlobalCheckpointsAction.INSTANCE, request).get(); | ||
| long[] expected = new long[shards]; | ||
| for (int i = 0; i < shards; ++i) { | ||
| expected[i] = -1; | ||
| } | ||
| Arrays.fill(expected, -1); | ||
| assertArrayEquals(expected, response.globalCheckpoints()); | ||
|
|
||
| final int totalDocuments = shards * 3; | ||
|
|
@@ -149,7 +148,7 @@ public void testPollGlobalCheckpointAdvancement() throws Exception { | |
|
|
||
| } | ||
|
|
||
| public void testPollGlobalCheckpointAdvancementTimeout() throws Exception { | ||
| public void testPollGlobalCheckpointAdvancementTimeout() { | ||
| String indexName = "test_index"; | ||
| client().admin() | ||
| .indices() | ||
|
|
@@ -182,7 +181,7 @@ public void testPollGlobalCheckpointAdvancementTimeout() throws Exception { | |
| assertEquals(29L, response.globalCheckpoints()[0]); | ||
| } | ||
|
|
||
| public void testMustProvideCorrectNumberOfShards() throws Exception { | ||
| public void testMustProvideCorrectNumberOfShards() { | ||
| String indexName = "test_index"; | ||
| client().admin() | ||
| .indices() | ||
|
|
@@ -214,7 +213,7 @@ public void testMustProvideCorrectNumberOfShards() throws Exception { | |
| ); | ||
| } | ||
|
|
||
| public void testWaitForAdvanceOnlySupportsOneShard() throws Exception { | ||
| public void testWaitForAdvanceOnlySupportsOneShard() { | ||
| String indexName = "test_index"; | ||
| client().admin() | ||
| .indices() | ||
|
|
@@ -305,42 +304,63 @@ public void testWaitOnIndexCreated() throws Exception { | |
| assertFalse(response.timedOut()); | ||
| } | ||
|
|
||
| public void testPrimaryShardsNotReadyNoWait() throws Exception { | ||
| final GetGlobalCheckpointsAction.Request request = new GetGlobalCheckpointsAction.Request( | ||
| "not-assigned", | ||
| false, | ||
| false, | ||
| EMPTY_ARRAY, | ||
| TEN_SECONDS | ||
| ); | ||
| /** | ||
| * Cluster remains yellow when initial primary is THROTTLED (and unavailable) during creation. | ||
| * This test verifies that implementation can handle this scenario. | ||
| */ | ||
| public void testWaitOnIndexCreatedWithThrottling() { | ||
|
|
||
| client().admin() | ||
| .cluster() | ||
| .prepareUpdateSettings() | ||
| .setPersistentSettings( | ||
| Settings.builder().put(CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING.getKey(), 0).build() | ||
| ) | ||
| .get(); | ||
|
|
||
| client().admin() | ||
| .indices() | ||
| .prepareCreate("not-assigned") | ||
| .prepareCreate("throttled-during-creation") | ||
| .setWaitForActiveShards(ActiveShardCount.NONE) | ||
| .setSettings( | ||
| Settings.builder() | ||
| .put(IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.getKey(), Translog.Durability.REQUEST) | ||
| .put("index.number_of_shards", 1) | ||
| .put("index.number_of_replicas", 0) | ||
| .put(IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "node", "none") | ||
| ) | ||
| .get(); | ||
|
|
||
| UnavailableShardsException exception = expectThrows( | ||
| UnavailableShardsException.class, | ||
| () -> client().execute(GetGlobalCheckpointsAction.INSTANCE, request).actionGet() | ||
| ); | ||
| assertEquals("Primary shards were not active [shards=1, active=0]", exception.getMessage()); | ||
| try { | ||
| TimeValue timeout = TimeValue.timeValueMillis(between(10, 100)); | ||
| UnavailableShardsException exception = expectThrows( | ||
| UnavailableShardsException.class, | ||
| () -> client().execute( | ||
| GetGlobalCheckpointsAction.INSTANCE, | ||
| new GetGlobalCheckpointsAction.Request("throttled-during-creation", true, true, EMPTY_ARRAY, timeout) | ||
| ).actionGet() | ||
| ); | ||
| assertEquals( | ||
| "Primary shards were not active within timeout [timeout=" + timeout + ", shards=1, active=0]", | ||
| exception.getMessage() | ||
| ); | ||
| } finally { | ||
| client().admin() | ||
| .cluster() | ||
| .prepareUpdateSettings() | ||
| .setPersistentSettings( | ||
| Settings.builder().putNull(CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING.getKey()).build() | ||
| ) | ||
| .get(); | ||
| } | ||
|
||
| } | ||
|
|
||
| public void testWaitOnPrimaryShardsReadyTimeout() throws Exception { | ||
| TimeValue timeout = TimeValue.timeValueMillis(between(1, 100)); | ||
| public void testPrimaryShardsNotReadyNoWait() { | ||
| final GetGlobalCheckpointsAction.Request request = new GetGlobalCheckpointsAction.Request( | ||
| "not-assigned", | ||
| true, | ||
| true, | ||
| false, | ||
| false, | ||
| EMPTY_ARRAY, | ||
| timeout | ||
| TEN_SECONDS | ||
| ); | ||
| client().admin() | ||
| .indices() | ||
|
|
@@ -359,21 +379,21 @@ public void testWaitOnPrimaryShardsReadyTimeout() throws Exception { | |
| UnavailableShardsException.class, | ||
| () -> client().execute(GetGlobalCheckpointsAction.INSTANCE, request).actionGet() | ||
| ); | ||
| assertEquals("Primary shards were not active within timeout [timeout=" + timeout + ", shards=1, active=0]", exception.getMessage()); | ||
| assertEquals("Primary shards were not active [shards=1, active=0]", exception.getMessage()); | ||
| } | ||
|
|
||
| public void testWaitOnPrimaryShardsReady() throws Exception { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we don't want to remove this test. It's still valid isn't it? I was expecting us to strengthen this test to verify that creating the index concurrently with running the action still works. It turns out we already have that test, so we can leave this one as-is IMO. |
||
| String indexName = "not-assigned"; | ||
| public void testWaitOnPrimaryShardsReadyTimeout() { | ||
| TimeValue timeout = TimeValue.timeValueMillis(between(1, 100)); | ||
| final GetGlobalCheckpointsAction.Request request = new GetGlobalCheckpointsAction.Request( | ||
| indexName, | ||
| "not-assigned", | ||
| true, | ||
| true, | ||
| EMPTY_ARRAY, | ||
| TEN_SECONDS | ||
| timeout | ||
| ); | ||
| client().admin() | ||
| .indices() | ||
| .prepareCreate(indexName) | ||
| .prepareCreate("not-assigned") | ||
| .setWaitForActiveShards(ActiveShardCount.NONE) | ||
| .setSettings( | ||
| Settings.builder() | ||
|
|
@@ -384,20 +404,10 @@ public void testWaitOnPrimaryShardsReady() throws Exception { | |
| ) | ||
| .get(); | ||
|
|
||
| long start = System.nanoTime(); | ||
| ActionFuture<GetGlobalCheckpointsAction.Response> future = client().execute(GetGlobalCheckpointsAction.INSTANCE, request); | ||
| Thread.sleep(randomIntBetween(10, 100)); | ||
| client().admin() | ||
| .indices() | ||
| .prepareUpdateSettings(indexName) | ||
| .setSettings(Settings.builder().put(IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "node", "")) | ||
| .get(); | ||
| client().prepareIndex(indexName).setId(Integer.toString(0)).setSource("{}", XContentType.JSON).get(); | ||
|
|
||
| GetGlobalCheckpointsAction.Response response = future.actionGet(); | ||
| long elapsed = TimeValue.timeValueNanos(System.nanoTime() - start).seconds(); | ||
| assertThat(elapsed, lessThanOrEqualTo(TEN_SECONDS.seconds())); | ||
| assertThat(response.globalCheckpoints()[0], equalTo(0L)); | ||
| assertFalse(response.timedOut()); | ||
| UnavailableShardsException exception = expectThrows( | ||
| UnavailableShardsException.class, | ||
| () -> client().execute(GetGlobalCheckpointsAction.INSTANCE, request).actionGet() | ||
| ); | ||
| assertEquals("Primary shards were not active within timeout [timeout=" + timeout + ", shards=1, active=0]", exception.getMessage()); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm it seems like a bug that we even permit
0here. I don't see a good reason to do this in production and it would be pretty harmful to do this accidentally. Ok for now, but if we fixed this bug we'd need to find some other way to delay allocation.