-
Notifications
You must be signed in to change notification settings - Fork 9.2k
YARN-11323. [Federation] Improve ResourceManager Handler FinishApps. #4954
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
733333e
4978a70
adfc5ad
f98c892
d03b5fd
b8c6a3c
570bc57
2869e1c
512cd4f
7b287bc
a42bc43
5198d2e
8bd99a9
fec1fa4
945f1e3
8c78f40
bf052fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3727,6 +3727,26 @@ | |
| <value>yarnfederation/</value> | ||
| </property> | ||
|
|
||
| <property> | ||
| <description> | ||
| The number of retries to clear the app in the FederationStateStore, | ||
| the default value is 1, that is, after the app fails to clean up, it will retry the cleanup again. | ||
| </description> | ||
| <name>yarn.federation.state-store.clean-up-retry-count</name> | ||
| <value>1</value> | ||
| </property> | ||
|
|
||
| <property> | ||
| <description> | ||
| Clear the sleep time of App retry in FederationStateStore. | ||
| When the app fails to clean up, | ||
| it will sleep for a period of time and then try to clean up. | ||
| The default value is 1000ms. | ||
| </description> | ||
| <name>yarn.federation.state-store.clean-up-retry-sleep-time</name> | ||
| <value>1000ms</value> | ||
|
||
| </property> | ||
|
|
||
| <!-- Other Configuration --> | ||
|
|
||
| <property> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| /** | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with this | ||
| * work for additional information regarding copyright ownership. The ASF | ||
| * licenses this file to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
| * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
| * License for the specific language governing permissions and limitations under | ||
| * the License. | ||
| */ | ||
|
|
||
| package org.apache.hadoop.yarn.server.federation.retry; | ||
|
|
||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| public abstract class FederationActionRetry<T> { | ||
|
|
||
| public static final Logger LOG = | ||
|
||
| LoggerFactory.getLogger(FederationActionRetry.class); | ||
|
|
||
| protected abstract T run() throws Exception; | ||
|
|
||
| public T runWithRetries(int retryCount, long retrySleepTime) throws Exception { | ||
| int retry = 0; | ||
| while (true) { | ||
| try { | ||
| return run(); | ||
| } catch (Exception e) { | ||
| LOG.info("Exception while executing an Federation operation.", e); | ||
| if (++retry > retryCount) { | ||
| LOG.info("Maxed out Federation retries. Giving up!"); | ||
| throw e; | ||
| } | ||
| LOG.info("Retrying operation on Federation. Retry no. {}", retry); | ||
| Thread.sleep(retrySleepTime); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| /** Federation Retry Policies. **/ | ||
| package org.apache.hadoop.yarn.server.federation.retry; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TimeUnit.SECONDS.toMillis(1);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you very much for your help reviewing the code, I will fix it.