-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Check for global blocks after IndexNotFoundException in TransportMasterNodeAction #78128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
f4bb014
3e79077
f0dabd5
1aec484
0c56fc2
f08112e
aaf0d74
f58c8c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,7 @@ | |
| import org.elasticsearch.common.io.stream.Writeable; | ||
| import org.elasticsearch.core.TimeValue; | ||
| import org.elasticsearch.discovery.MasterNotDiscoveredException; | ||
| import org.elasticsearch.index.IndexNotFoundException; | ||
| import org.elasticsearch.node.NodeClosedException; | ||
| import org.elasticsearch.tasks.CancellableTask; | ||
| import org.elasticsearch.tasks.Task; | ||
|
|
@@ -99,6 +100,10 @@ protected boolean localExecute(Request request) { | |
|
|
||
| protected abstract ClusterBlockException checkBlock(Request request, ClusterState state); | ||
|
|
||
| protected ClusterBlockException checkGlobalBlock(ClusterState clusterState) { | ||
| return null; | ||
| } | ||
|
|
||
| @Override | ||
| protected void doExecute(Task task, final Request request, ActionListener<Response> listener) { | ||
| ClusterState state = clusterService.state(); | ||
|
|
@@ -135,22 +140,7 @@ protected void doStart(ClusterState clusterState) { | |
| // check for block, if blocked, retry, else, execute locally | ||
| final ClusterBlockException blockException = checkBlock(request, clusterState); | ||
| if (blockException != null) { | ||
| if (blockException.retryable() == false) { | ||
| logger.trace("can't execute due to a non-retryable cluster block", blockException); | ||
| listener.onFailure(blockException); | ||
| } else { | ||
| logger.debug("can't execute due to a cluster block, retrying", blockException); | ||
| retry(clusterState, blockException, newState -> { | ||
| try { | ||
| ClusterBlockException newException = checkBlock(request, newState); | ||
| return (newException == null || newException.retryable() == false); | ||
| } catch (Exception e) { | ||
| // accept state as block will be rechecked by doStart() and listener.onFailure() then called | ||
| logger.debug("exception occurred during cluster block checking, accepting state", e); | ||
| return true; | ||
| } | ||
| }); | ||
| } | ||
| handleClusterBlockException(clusterState, blockException); | ||
| } else { | ||
| ActionListener<Response> delegate = listener.delegateResponse((delegatedListener, t) -> { | ||
| if (t instanceof FailedToCommitClusterStateException || t instanceof NotMasterException) { | ||
|
|
@@ -193,12 +183,41 @@ public void handleException(final TransportException exp) { | |
| }); | ||
| } | ||
| } | ||
| } catch (IndexNotFoundException e) { | ||
|
||
| // In some situations it's possible that this is a false exception, i.e. while there's a STATE_NOT_RECOVERED_BLOCK | ||
| // to ensure that this is a legitimate index not found exception we should check if there's a cluster exception and | ||
| // handle it if there's one. | ||
| ClusterBlockException clusterBlockException = checkGlobalBlock(clusterState); | ||
| if (clusterBlockException != null) { | ||
| handleClusterBlockException(clusterState, clusterBlockException); | ||
| } else { | ||
| listener.onFailure(e); | ||
| } | ||
| } catch (Exception e) { | ||
| logger.trace("top-level failure", e); | ||
| listener.onFailure(e); | ||
| } | ||
| } | ||
|
|
||
| private void handleClusterBlockException(ClusterState clusterState, ClusterBlockException blockException) { | ||
| if (blockException.retryable() == false) { | ||
| logger.trace("can't execute due to a non-retryable cluster block", blockException); | ||
| listener.onFailure(blockException); | ||
| } else { | ||
| logger.debug("can't execute due to a cluster block, retrying", blockException); | ||
| retry(clusterState, blockException, newState -> { | ||
| try { | ||
| ClusterBlockException newException = checkBlock(request, newState); | ||
|
||
| return (newException == null || newException.retryable() == false); | ||
| } catch (Exception e) { | ||
| // accept state as block will be rechecked by doStart() and listener.onFailure() then called | ||
| logger.debug("exception occurred during cluster block checking, accepting state", e); | ||
| return true; | ||
| } | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| private void retryOnMasterChange(ClusterState state, Throwable failure) { | ||
| retry(state, failure, MasterNodeChangePredicate.build(state)); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thinking about this some more, I think we only really care about INFEs thrown while the
STATE_NOT_RECOVERED_BLOCKis in place, other blocks don't really matter. I think rather than introducing thecheckGlobalBlockmethod (which kinda duplicatescheckBlock) it'd be better to wrapcheckBlocklike this: