-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-27551 Add config options to delay assignment to retain last region location #4945
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 4 commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
87fc143
HBASE-27551 Add config options to delay assignment to retain last reg…
69db293
Added comments to the two new config properties
d1c9ecb
Addressing Duo's suggestions
20c454c
addressing latest suggestions by Duo
b5d21a0
Addressing latest review comments
wchevreuil File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| import static org.apache.hadoop.hbase.io.hfile.CacheConfig.DEFAULT_EVICT_ON_CLOSE; | ||
| import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_CLOSE_KEY; | ||
| import static org.apache.hadoop.hbase.master.LoadBalancer.BOGUS_SERVER_NAME; | ||
| import static org.apache.hadoop.hbase.master.assignment.AssignmentManager.FORCE_REGION_RETAINMENT; | ||
|
|
||
| import edu.umd.cs.findbugs.annotations.Nullable; | ||
| import java.io.IOException; | ||
|
|
@@ -31,6 +32,7 @@ | |
| import org.apache.hadoop.hbase.client.RetriesExhaustedException; | ||
| import org.apache.hadoop.hbase.master.MetricsAssignmentManager; | ||
| import org.apache.hadoop.hbase.master.RegionState.State; | ||
| import org.apache.hadoop.hbase.master.ServerManager; | ||
| import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; | ||
| import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; | ||
| import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; | ||
|
|
@@ -95,6 +97,10 @@ | |
| * Notice that, although we allow specify a target server, it just acts as a candidate, we do not | ||
| * guarantee that the region will finally be on the target server. If this is important for you, you | ||
| * should check whether the region is on the target server after the procedure is finished. | ||
| * </p> | ||
| * Altenatively, for trying retaining assignments, the | ||
| * <b>hbase.master.scp.retain.assignment.force</b> option can be used together with | ||
| * <b>hbase.master.scp.retain.assignment</b>. | ||
| * <p/> | ||
| * When you want to schedule a TRSP, please check whether there is still one for this region, and | ||
| * the check should be under the RegionStateNode lock. We will remove the TRSP from a | ||
|
|
@@ -126,6 +132,8 @@ public class TransitRegionStateProcedure | |
|
|
||
| private boolean isSplit; | ||
|
|
||
| private long retries; | ||
|
|
||
| public TransitRegionStateProcedure() { | ||
| } | ||
|
|
||
|
|
@@ -188,6 +196,27 @@ protected boolean waitInitialized(MasterProcedureEnv env) { | |
| return am.waitMetaLoaded(this) || am.waitMetaAssigned(this, getRegion()); | ||
| } | ||
|
|
||
| private void checkAndWaitForOriginalServer(MasterProcedureEnv env, ServerName lastHost) | ||
| throws ProcedureSuspendedException { | ||
| ServerManager serverManager = env.getMasterServices().getServerManager(); | ||
| ServerName newNameForServer = serverManager.findServerWithSameHostnamePortWithLock(lastHost); | ||
| boolean isOnline = serverManager.createDestinationServersList().contains(newNameForServer); | ||
|
|
||
| if (!isOnline && retries < env.getAssignmentManager().getForceRegionRetainmentRetries()) { | ||
| retries++; | ||
| LOG.info("Suspending the TRSP PID={} because {} is true and previous host {} " | ||
| + "for region is not yet online.", this.getProcId(), FORCE_REGION_RETAINMENT, lastHost); | ||
| setTimeout(env.getAssignmentManager().getForceRegionRetainmentWait()); | ||
|
||
| setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); | ||
| throw new ProcedureSuspendedException(); | ||
| } | ||
| LOG.info( | ||
Apache9 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| "{} is true. TRSP PID={} waited {}ms for host {} to come back online. " | ||
| + "Did host come back online? {}", | ||
| FORCE_REGION_RETAINMENT, this.getProcId(), | ||
| (retries * env.getAssignmentManager().getForceRegionRetainmentWait()), lastHost, isOnline); | ||
| } | ||
|
|
||
| private void queueAssign(MasterProcedureEnv env, RegionStateNode regionNode) | ||
| throws ProcedureSuspendedException { | ||
| boolean retain = false; | ||
|
|
@@ -200,9 +229,18 @@ private void queueAssign(MasterProcedureEnv env, RegionStateNode regionNode) | |
| regionNode.setRegionLocation(assignCandidate); | ||
| } else if (regionNode.getLastHost() != null) { | ||
| retain = true; | ||
| LOG.info("Setting lastHost as the region location {}", regionNode.getLastHost()); | ||
| LOG.info("Setting lastHost {} as the location for region {}", regionNode.getLastHost(), | ||
| regionNode.getRegionInfo().getEncodedName()); | ||
| regionNode.setRegionLocation(regionNode.getLastHost()); | ||
| } | ||
| if ( | ||
| regionNode.getRegionLocation() != null | ||
| && env.getAssignmentManager().isForceRegionRetainment() | ||
| ) { | ||
| LOG.warn("{} is set to true. This may delay regions re-assignment " | ||
| + "upon RegionServers crashes or restarts.", FORCE_REGION_RETAINMENT); | ||
| checkAndWaitForOriginalServer(env, regionNode.getRegionLocation()); | ||
| } | ||
| } | ||
| LOG.info("Starting {}; {}; forceNewPlan={}, retain={}", this, regionNode.toShortString(), | ||
| forceNewPlan, retain); | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better call it wait interval?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ack