diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java index 440f8c664662..16243640708b 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java @@ -840,6 +840,20 @@ private static HRegionInfo getHRegionInfo(final Result r, byte [] qualifier) { cell.getValueOffset(), cell.getValueLength()); } + /** + * Returns the daughter regions by reading the corresponding columns of the catalog table + * Result. + * @param connection connection we're using + * @param parent region information of parent + * @return a pair of HRegionInfo or PairOfSameType(null, null) if the region is not a split + * parent + */ + public static PairOfSameType getDaughterRegionsFromParent( + final Connection connection, HRegionInfo parent) throws IOException { + Result parentResult = getRegionResult(connection, parent.getRegionName()); + return getDaughterRegions(parentResult); + } + /** * Returns the daughter regions by reading the corresponding columns of the catalog table * Result. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index 3a02bdbde135..e31868e414da 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -44,13 +44,17 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableStateManager; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Mutation; +import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionReplicaUtil; +import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ConfigUtil; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.PairOfSameType; import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.zookeeper.KeeperException; @@ -737,11 +741,13 @@ public void regionOffline( public List serverOffline(final ZooKeeperWatcher watcher, final ServerName sn) { // Offline all regions on this server not already in transition. List rits = new ArrayList(); - Set regionsToClean = new HashSet(); + Set> regionsToClean = + new HashSet>(); // Offline regions outside the loop and synchronized block to avoid // ConcurrentModificationException and deadlock in case of meta anassigned, // but RegionState a blocked. Set regionsToOffline = new HashSet(); + Map daughter2Parent = new HashMap<>(); synchronized (this) { Set assignedRegions = serverHoldings.get(sn); if (assignedRegions == null) { @@ -758,8 +764,20 @@ public List serverOffline(final ZooKeeperWatcher watcher, final Ser // Delete the ZNode if exists ZKAssign.deleteNodeFailSilent(watcher, region); regionsToOffline.add(region); + PairOfSameType daughterRegions = + MetaTableAccessor.getDaughterRegionsFromParent(this.server.getConnection(), region); + if (daughterRegions != null) { + if (daughterRegions.getFirst() != null) { + daughter2Parent.put(daughterRegions.getFirst().getEncodedName(), region); + } + if (daughterRegions.getSecond() != null) { + daughter2Parent.put(daughterRegions.getSecond().getEncodedName(), region); + } + } } catch (KeeperException ke) { server.abort("Unexpected ZK exception deleting node " + region, ke); + } catch (IOException e) { + LOG.warn("get daughter from meta exception " + region, e); } } } @@ -783,10 +801,20 @@ public List serverOffline(final ZooKeeperWatcher watcher, final Ser LOG.info("Found region in " + state + " to be reassigned by ServerCrashProcedure for " + sn); rits.add(hri); - } else if(state.isSplittingNew() || state.isMergingNew()) { - LOG.info("Offline/Cleanup region if no meta entry exists, hri: " + hri + - " state: " + state); - regionsToClean.add(state.getRegion()); + } else if (state.isSplittingNew() || state.isMergingNew()) { + LOG.info( + "Offline/Cleanup region if no meta entry exists, hri: " + hri + " state: " + state); + if (daughter2Parent.containsKey(hri.getEncodedName())) { + HRegionInfo parent = daughter2Parent.get(hri.getEncodedName()); + HRegionInfo info = getHRIFromMeta(parent); + if (info != null && info.isSplit() && info.isOffline()) { + regionsToClean.add(Pair.newPair(state.getRegion(), info)); + } else { + regionsToClean.add(Pair.newPair(state.getRegion(), null)); + } + } else { + regionsToClean.add(Pair.newPair(state.getRegion(), null)); + } } else { LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state); } @@ -803,6 +831,19 @@ public List serverOffline(final ZooKeeperWatcher watcher, final Ser return rits; } + private HRegionInfo getHRIFromMeta(HRegionInfo parent) { + Result result = null; + try { + result = + MetaTableAccessor.getRegionResult(this.server.getConnection(), parent.getRegionName()); + HRegionInfo info = MetaTableAccessor.getHRegionInfo(result); + return info; + } catch (IOException e) { + LOG.error("got exception when query meta with region " + parent.getEncodedName(), e); + return null; + } + } + /** * This method does an RPC to hbase:meta. Do not call this method with a lock/synchronize held. * In ZK mode we rollback and hence cleanup daughters/merged region. We also cleanup if @@ -810,12 +851,14 @@ public List serverOffline(final ZooKeeperWatcher watcher, final Ser * * @param hris The hris to check if empty in hbase:meta and if so, clean them up. */ - private void cleanFailedSplitMergeRegions(Set hris) { + private void cleanFailedSplitMergeRegions(Set> hris) { if (hris.isEmpty()) { return; } - for (HRegionInfo hri : hris) { + for (Pair hriPair : hris) { + HRegionInfo hri = hriPair.getFirst(); + HRegionInfo parentInfo = hriPair.getSecond(); // This is RPC to meta table. It is done while we have a synchronize on // regionstates. No progress will be made if meta is not available at this time. // This is a cleanup task. Not critical. @@ -829,6 +872,15 @@ private void cleanFailedSplitMergeRegions(Set hris) { if (regionPair != null) { MetaTableAccessor.deleteRegion(this.server.getConnection(), hri); } + if (parentInfo != null) { + List mutations = new ArrayList(); + HRegionInfo copyOfParent = new HRegionInfo(parentInfo); + copyOfParent.setOffline(false); + copyOfParent.setSplit(false); + Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent); + mutations.add(putParent); + MetaTableAccessor.mutateMetaTable(this.server.getConnection(), mutations); + } LOG.debug("Cleaning up HDFS since no meta entry exists, hri: " + hri); FSUtils.deleteRegionDir(server.getConfiguration(), hri); }