-
Notifications
You must be signed in to change notification settings - Fork 25.6k
ILM fix the init step to actually be retryable #52076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
63f377c
af73584
6e46ae3
26a18af
a180e61
27708dd
8523f21
b609366
3355825
d7f0828
04c444c
5695091
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,13 +47,17 @@ public ClusterState performAction(Index index, ClusterState clusterState) { | |
|
|
||
| IndexMetaData.Builder indexMetadataBuilder = IndexMetaData.builder(indexMetaData); | ||
| if (shouldParseIndexName(indexMetaData.getSettings())) { | ||
| long parsedOriginationDate = parseIndexNameAndExtractDate(index.getName()); | ||
| indexMetadataBuilder.settingsVersion(indexMetaData.getSettingsVersion() + 1) | ||
| .settings(Settings.builder() | ||
| .put(indexMetaData.getSettings()) | ||
| .put(LifecycleSettings.LIFECYCLE_ORIGINATION_DATE, parsedOriginationDate) | ||
| .build() | ||
| ); | ||
| try { | ||
| long parsedOriginationDate = parseIndexNameAndExtractDate(index.getName()); | ||
| indexMetadataBuilder.settingsVersion(indexMetaData.getSettingsVersion() + 1) | ||
| .settings(Settings.builder() | ||
| .put(indexMetaData.getSettings()) | ||
| .put(LifecycleSettings.LIFECYCLE_ORIGINATION_DATE, parsedOriginationDate) | ||
| .build() | ||
| ); | ||
| } catch (Exception e) { | ||
| throw new InitializePolicyException(e.getMessage(), e); | ||
|
||
| } | ||
| } | ||
|
|
||
| ClusterState.Builder newClusterStateBuilder = ClusterState.builder(clusterState); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License; | ||
| * you may not use this file except in compliance with the Elastic License. | ||
| */ | ||
| package org.elasticsearch.xpack.core.ilm; | ||
|
|
||
| import org.elasticsearch.ElasticsearchException; | ||
|
|
||
| /** | ||
| * Exception thrown when a problem is encountered while initialising an ILM policy for an index. | ||
| */ | ||
| public class InitializePolicyException extends ElasticsearchException { | ||
|
|
||
| public InitializePolicyException(String msg, Throwable cause, Object... args) { | ||
|
||
| super(msg, cause, args); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1120,26 +1120,26 @@ public void testRolloverStepRetriesUntilRolledOverIndexIsDeleted() throws Except | |
| // {@link org.elasticsearch.xpack.core.ilm.ErrorStep} in order to retry the failing step. As {@link #assertBusy} | ||
| // increases the wait time between calls exponentially, we might miss the window where the policy is on | ||
| // {@link WaitForRolloverReadyStep} and the move to `attempt-rollover` request will not be successful. | ||
| waitUntil(() -> { | ||
| assertThat(waitUntil(() -> { | ||
|
||
| try { | ||
| return client().performRequest(moveToStepRequest).getStatusLine().getStatusCode() == 200; | ||
| } catch (IOException e) { | ||
| return false; | ||
| } | ||
| }, 30, TimeUnit.SECONDS); | ||
| }, 30, TimeUnit.SECONDS), is(true)); | ||
|
|
||
| // Similar to above, using {@link #waitUntil} as we want to make sure the `attempt-rollover` step started failing and is being | ||
| // retried (which means ILM moves back and forth between the `attempt-rollover` step and the `error` step) | ||
| waitUntil(() -> { | ||
| assertThat("ILM did not start retrying the attempt-rollover step", waitUntil(() -> { | ||
|
||
| try { | ||
| Map<String, Object> explainIndexResponse = explainIndex(index); | ||
| String step = (String) explainIndexResponse.get("step"); | ||
| String failedStep = (String) explainIndexResponse.get("failed_step"); | ||
| Integer retryCount = (Integer) explainIndexResponse.get(FAILED_STEP_RETRY_COUNT_FIELD); | ||
| return step != null && step.equals("attempt-rollover") && retryCount != null && retryCount >= 1; | ||
| return failedStep != null && failedStep.equals("attempt-rollover") && retryCount != null && retryCount >= 1; | ||
| } catch (IOException e) { | ||
| return false; | ||
| } | ||
| }, 30, TimeUnit.SECONDS); | ||
| }, 30, TimeUnit.SECONDS), is(true)); | ||
|
|
||
| deleteIndex(rolledIndex); | ||
|
|
||
|
|
@@ -1181,16 +1181,17 @@ public void testUpdateRolloverLifecycleDateStepRetriesWhenRolloverInfoIsMissing( | |
| "}"); | ||
| client().performRequest(moveToStepRequest); | ||
|
|
||
| waitUntil(() -> { | ||
| assertThat("ILM did not start retrying the update-rollover-lifecycle-date step", waitUntil(() -> { | ||
| try { | ||
| Map<String, Object> explainIndexResponse = explainIndex(index); | ||
| String step = (String) explainIndexResponse.get("step"); | ||
| String failedStep = (String) explainIndexResponse.get("failed_step"); | ||
| Integer retryCount = (Integer) explainIndexResponse.get(FAILED_STEP_RETRY_COUNT_FIELD); | ||
| return step != null && step.equals(UpdateRolloverLifecycleDateStep.NAME) && retryCount != null && retryCount >= 1; | ||
| return failedStep != null && failedStep.equals(UpdateRolloverLifecycleDateStep.NAME) && retryCount != null | ||
| && retryCount >= 1; | ||
| } catch (IOException e) { | ||
| return false; | ||
| } | ||
| }); | ||
| }, 30, TimeUnit.SECONDS), is(true)); | ||
|
|
||
| index(client(), index, "1", "foo", "bar"); | ||
| Request refreshIndex = new Request("POST", "/" + index + "/_refresh"); | ||
|
|
@@ -1376,16 +1377,17 @@ public void testRetryableInitializationStep() throws Exception { | |
| assertOK(client().performRequest(startReq)); | ||
|
|
||
| // Wait until an error has occurred. | ||
| waitUntil(() -> { | ||
| assertThat("ILM did not start retrying the init step", waitUntil(() -> { | ||
| try { | ||
| Map<String, Object> explainIndexResponse = explainIndex(index); | ||
| String step = (String) explainIndexResponse.get("step"); | ||
| String failedStep = (String) explainIndexResponse.get("failed_step"); | ||
| Integer retryCount = (Integer) explainIndexResponse.get(FAILED_STEP_RETRY_COUNT_FIELD); | ||
| return step != null && step.equals(InitializePolicyContextStep.KEY.getAction()) && retryCount != null && retryCount >= 1; | ||
| return failedStep != null && failedStep.equals(InitializePolicyContextStep.KEY.getAction()) && retryCount != null | ||
| && retryCount >= 1; | ||
| } catch (IOException e) { | ||
| return false; | ||
| } | ||
| }, 30, TimeUnit.SECONDS); | ||
| }, 30, TimeUnit.SECONDS), is(true)); | ||
|
|
||
| // Turn origination date parsing back off | ||
| updateIndexSettings(index, Settings.builder() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we may want to move the
tryto surround more of the function (for example, thefromIndexMetadata(...)call