diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java index c77a736a4de9..996cd2650984 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java @@ -23,6 +23,7 @@ import java.util.concurrent.Callable; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ipc.RpcControllerFactory; import org.apache.yetus.audience.InterfaceAudience; @@ -33,7 +34,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter; -import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetTableStateResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.HbckService.BlockingInterface; @@ -156,7 +156,7 @@ public MasterProtos.BypassProcedureResponse call() throws Exception { } @Override - public List scheduleServerCrashProcedure(List serverNames) + public List scheduleServerCrashProcedures(List serverNames) throws IOException { try { MasterProtos.ScheduleServerCrashProcedureResponse response = @@ -171,18 +171,4 @@ public List scheduleServerCrashProcedure(List serv throw new IOException(se); } } - - @Override - public Map - getFailedSplitMergeLegacyRegions(List tableNames) throws IOException { - try { - MasterProtos.GetFailedSplitMergeLegacyRegionsResponse response = - this.hbck.getFailedSplitMergeLegacyRegions(rpcControllerFactory.newController(), - RequestConverter.toGetFailedSplitMergeLegacyRegionsRequest(tableNames)); - return response.getErrorsMap(); - } catch (ServiceException se) { - LOG.debug("get failed split/merge legacy regions failed", se); - throw new IOException(se); - } - } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java index 249cd87a8435..1952e3636578 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java @@ -21,11 +21,15 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; + import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.yetus.audience.InterfaceAudience; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos; @@ -105,18 +109,16 @@ default List unassigns(List encodedRegionNames) throws IOException List bypassProcedure(List pids, long waitTime, boolean override, boolean recursive) throws IOException; - List scheduleServerCrashProcedure(List serverNames) - throws IOException; - /** - * This method is to get the regions which left by failed split/merge procedures for a certain - * table. There are two kinds of region this method will return. One is orphan regions left on FS, - * which left because split/merge procedure crashed before updating meta. And the other one is - * unassigned split daughter region or merged region, which left because split/merge procedure - * crashed before assignment. - * @param tableName table to check - * @return Map of problematic regions + * Use {@link #scheduleServerCrashProcedures(List)} instead. + * @deprecated since 2.2.1. Will removed in 3.0.0. */ - Map - getFailedSplitMergeLegacyRegions(List tableName) throws IOException; + @Deprecated + default List scheduleServerCrashProcedure(List serverNames) + throws IOException { + return scheduleServerCrashProcedures( + serverNames.stream().map(ProtobufUtil::toServerName).collect(Collectors.toList())); + } + + List scheduleServerCrashProcedures(List serverNames) throws IOException; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java index 5515b2f1b69c..1bad6bd3e308 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java @@ -1887,19 +1887,11 @@ public static MasterProtos.UnassignsRequest toUnassignRegionsRequest( } public static MasterProtos.ScheduleServerCrashProcedureRequest - toScheduleServerCrashProcedureRequest(List serverNames) { - MasterProtos.ScheduleServerCrashProcedureRequest.Builder b = + toScheduleServerCrashProcedureRequest(List serverNames) { + MasterProtos.ScheduleServerCrashProcedureRequest.Builder builder = MasterProtos.ScheduleServerCrashProcedureRequest.newBuilder(); - return b.addAllServerName(serverNames).build(); - } - - public static MasterProtos.GetFailedSplitMergeLegacyRegionsRequest - toGetFailedSplitMergeLegacyRegionsRequest(List tableNames) { - MasterProtos.GetFailedSplitMergeLegacyRegionsRequest.Builder b = - MasterProtos.GetFailedSplitMergeLegacyRegionsRequest.newBuilder(); - List protoTableNames = tableNames.stream() - .map(tableName -> ProtobufUtil.toProtoTableName(tableName)).collect(Collectors.toList()); - return b.addAllTable(protoTableNames).build(); + serverNames.stream().map(ProtobufUtil::toServerName).forEach(sn -> builder.addServerName(sn)); + return builder.build(); } private static List toEncodedRegionNameRegionSpecifiers( diff --git a/hbase-protocol-shaded/src/main/protobuf/Master.proto b/hbase-protocol-shaded/src/main/protobuf/Master.proto index 342a7827d7bf..35ae5ea0a261 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Master.proto @@ -1108,19 +1108,6 @@ message ScheduleServerCrashProcedureResponse { repeated uint64 pid = 1; } -message GetFailedSplitMergeLegacyRegionsRequest { - repeated TableName table = 1; -} - -enum RegionErrorType { - DAUGHTER_MERGED_REGION_NOT_ONLINE = 0; - ORPHAN_REGION_ON_FS = 1; -} - -message GetFailedSplitMergeLegacyRegionsResponse { - map errors = 1; -} - service HbckService { /** Update state of the table in meta only*/ rpc SetTableStateInMeta(SetTableStateInMetaRequest) @@ -1151,7 +1138,4 @@ service HbckService { /** Schedule a ServerCrashProcedure to help recover a crash server */ rpc ScheduleServerCrashProcedure(ScheduleServerCrashProcedureRequest) returns(ScheduleServerCrashProcedureResponse); - - rpc getFailedSplitMergeLegacyRegions(GetFailedSplitMergeLegacyRegionsRequest) - returns(GetFailedSplitMergeLegacyRegionsResponse); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 0f1352c31c8c..d3fb1ca8ef6e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -2528,163 +2528,6 @@ public MasterProtos.ScheduleServerCrashProcedureResponse scheduleServerCrashProc } } - @Override - public MasterProtos.GetFailedSplitMergeLegacyRegionsResponse getFailedSplitMergeLegacyRegions( - RpcController controller, MasterProtos.GetFailedSplitMergeLegacyRegionsRequest request) - throws ServiceException { - List tables = request.getTableList(); - - Map errorRegions = new HashMap<>(); - try { - for (HBaseProtos.TableName tableName : tables) { - errorRegions.putAll(getFailedSplitMergeLegacyRegions(ProtobufUtil.toTableName(tableName))); - } - } catch (IOException e) { - throw new ServiceException(e); - } - return MasterProtos.GetFailedSplitMergeLegacyRegionsResponse.newBuilder() - .putAllErrors(errorRegions).build(); - } - - private Map - getFailedSplitMergeLegacyRegions(TableName tableName) throws IOException { - if (!MetaTableAccessor.tableExists(master.getConnection(), tableName)) { - throw new IOException("table " + tableName.getNameAsString() + " doesn't exist"); - } - if (!MetaTableAccessor.getTableState(master.getConnection(), tableName).isEnabled()) { - throw new IOException( - "table " + tableName.getNameAsString() + " is not enabled yet"); - } - final Map problemRegions = new HashMap<>(); - - // Case 1. find orphan region on fs - // orphan regions may due to a failed split region procedure, which daughter regions are created - // then the procedure is aborted. Or merged region is created then the procedure is aborted. - List orphanRegions = findOrphanRegionOnFS(tableName); - orphanRegions.stream().forEach( - region -> problemRegions.put(region, MasterProtos.RegionErrorType.ORPHAN_REGION_ON_FS)); - - // Case 2. find unassigned daughter regions or merged regions - List unassignedDaughterOrMergedRegions = - findUnassignedDaughterOrMergedRegions(tableName); - unassignedDaughterOrMergedRegions.stream().forEach(region -> problemRegions.put(region, - MasterProtos.RegionErrorType.DAUGHTER_MERGED_REGION_NOT_ONLINE)); - - // if these regions in problemRegions are currently handled by SplitTableRegionProcedure or - // MergeTableRegionsProcedure, we should remove them from this map - master.getProcedures().stream().filter(p -> !(p.isFinished() || p.isBypass())).forEach(p -> { - if (p instanceof SplitTableRegionProcedure) { - problemRegions - .remove(((SplitTableRegionProcedure) p).getDaughterOneRI().getRegionNameAsString()); - problemRegions - .remove(((SplitTableRegionProcedure) p).getDaughterTwoRI().getRegionNameAsString()); - } else if (p instanceof MergeTableRegionsProcedure) { - problemRegions - .remove(((MergeTableRegionsProcedure) p).getMergedRegion().getRegionNameAsString()); - } - }); - - // check if regions are still problematic now - checkRegionStillProblematic(problemRegions, tableName); - return problemRegions; - } - - - private void checkRegionStillProblematic( - Map problemRegions, TableName tableName) - throws IOException { - Iterator> iterator = - problemRegions.entrySet().iterator(); - while (iterator.hasNext()) { - Map.Entry entry = iterator.next(); - Result r = MetaTableAccessor.getRegionResult(master.getConnection(), - Bytes.toBytesBinary(entry.getKey())); - switch (entry.getValue()) { - case ORPHAN_REGION_ON_FS: - // region is build for this directory, it is not a problematic region any more - if (r != null) { - iterator.remove(); - } - break; - case DAUGHTER_MERGED_REGION_NOT_ONLINE: - RegionState.State state = RegionStateStore.getRegionState(r, 0); - if (!state.matches(RegionState.State.CLOSED, RegionState.State.SPLITTING_NEW, - RegionState.State.MERGED)) { - iterator.remove(); - } - break; - default: - throw new IOException("there should be no problematic region of this type"); - } - } - } - - private List findUnassignedDaughterOrMergedRegions(TableName tableName) - throws IOException { - Set checkRegions = new HashSet<>(); - Map regionStates = new HashMap<>(); - Map regionInfos = new HashMap<>(); - - MetaTableAccessor.scanMeta(master.getConnection(), tableName, - MetaTableAccessor.QueryType.REGION, Integer.MAX_VALUE, r -> { - RegionInfo regionInfo = MetaTableAccessor.getRegionInfo(r); - regionInfos.put(regionInfo.getRegionNameAsString(), regionInfo); - RegionState.State state = RegionStateStore.getRegionState(r, 0); - regionStates.put(regionInfo.getEncodedName(), state); - if (regionInfo.isSplitParent()) { - PairOfSameType daughters = MetaTableAccessor.getDaughterRegions(r); - checkRegions.add(daughters.getFirst().getRegionNameAsString()); - checkRegions.add(daughters.getSecond().getRegionNameAsString()); - } else if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null - || r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEB_QUALIFIER) != null) { - checkRegions.add(regionInfo.getRegionNameAsString()); - } - return true; - }); - - // find unassigned merged or split daughter region - return checkRegions.stream().map(regionName -> regionInfos.get(regionName)) - .filter(regionInfo -> !regionInfo.isSplitParent()) - .filter(regionInfo -> !regionStates.get(regionInfo.getEncodedName()) - .matches(RegionState.State.OPEN)) - .map(regionInfo -> regionInfo.getRegionNameAsString()).collect(Collectors.toList()); - } - - private List findOrphanRegionOnFS(TableName tableName) throws IOException { - // get available regions from meta, merged region should be consider available - HashSet regionsInMeta = new HashSet<>(); - MetaTableAccessor.scanMeta(master.getConnection(), tableName, - MetaTableAccessor.QueryType.REGION, Integer.MAX_VALUE, r -> { - RegionInfo regionInfo = MetaTableAccessor.getRegionInfo(r); - regionsInMeta.add(regionInfo.getEncodedName()); - if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null - || r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEB_QUALIFIER) != null) { - PairOfSameType mergedRegions = MetaTableAccessor.getMergeRegions(r); - regionsInMeta.add(mergedRegions.getFirst().getEncodedName()); - regionsInMeta.add(mergedRegions.getSecond().getEncodedName()); - } - return true; - }); - // get regionInfo from fs - Path tableDir = FSUtils.getTableDir(master.getMasterFileSystem().getRootDir(), tableName); - FileStatus[] regions = - master.getFileSystem().listStatus(tableDir, path -> !path.getName().startsWith(".")); - HashMap regionNames = new HashMap<>(); - for (FileStatus region : regions) { - RegionInfo regionInfo = - HRegionFileSystem.loadRegionInfoFileContent(master.getFileSystem(), region.getPath()); - regionNames.put(regionInfo.getEncodedName(), regionInfo.getRegionNameAsString()); - } - Iterator> regionIterator = regionNames.entrySet().iterator(); - while (regionIterator.hasNext()) { - Map.Entry region = regionIterator.next(); - if (regionsInMeta.contains(region.getKey())) { - regionIterator.remove(); - } - } - return new ArrayList<>(regionNames.values()); - } - @Override public SwitchRpcThrottleResponse switchRpcThrottle(RpcController controller, SwitchRpcThrottleRequest request) throws ServiceException { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java index ee277d97b9d3..62771428e780 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java @@ -18,16 +18,13 @@ package org.apache.hadoop.hbase.client; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.Arrays; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import org.apache.hadoop.hbase.Coprocessor; import org.apache.hadoop.hbase.CoprocessorEnvironment; @@ -39,10 +36,7 @@ import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; import org.apache.hadoop.hbase.coprocessor.MasterObserver; import org.apache.hadoop.hbase.coprocessor.ObserverContext; -import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.RegionState; -import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure; -import org.apache.hadoop.hbase.master.assignment.SplitTableRegionProcedure; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface; import org.apache.hadoop.hbase.procedure2.Procedure; @@ -54,7 +48,6 @@ import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.util.Bytes; import org.junit.AfterClass; -import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -72,7 +65,6 @@ import org.apache.hbase.thirdparty.com.google.common.io.Closeables; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos; /** * Class to test HBaseHbck. Spins up the minicluster once at test start and then takes it down @@ -226,102 +218,6 @@ public void testAssigns() throws Exception { } } - @Test - public void testRecoverMergeAfterMetaUpdated() throws Exception { - String testTable = async ? "mergeTestAsync" : "mergeTestSync"; - TEST_UTIL.createMultiRegionTable(TableName.valueOf(testTable), Bytes.toBytes("family1"), 5); - TEST_UTIL.loadTable(TEST_UTIL.getConnection().getTable(TableName.valueOf(testTable)), - Bytes.toBytes("family1"), true); - HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); - Hbck hbck = getHbck(); - FailingMergeAfterMetaUpdatedMasterObserver observer = master.getMasterCoprocessorHost() - .findCoprocessor(FailingMergeAfterMetaUpdatedMasterObserver.class); - try (Admin admin = TEST_UTIL.getConnection().getAdmin()) { - List regions = admin.getRegions(TableName.valueOf(testTable)); - admin.mergeRegionsAsync(regions.get(0).getRegionName(), regions.get(1).getRegionName(), true); - assertNotNull(observer); - observer.latch.await(5000, TimeUnit.MILLISECONDS); - Map result = - hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable))); - Assert.assertEquals(0, result.size()); - Optional> procedure = TEST_UTIL.getHBaseCluster().getMaster().getProcedures() - .stream().filter(p -> p instanceof MergeTableRegionsProcedure).findAny(); - Assert.assertTrue(procedure.isPresent()); - hbck.bypassProcedure(Arrays.asList(procedure.get().getProcId()), 5, true, false); - result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable))); - Assert.assertEquals(1, result.size()); - hbck.assigns(Arrays.asList(result.keySet().toArray(new String[0])).stream() - .map(regionName -> regionName.split("\\.")[1]).collect(Collectors.toList())); - ProcedureTestingUtility.waitAllProcedures(master.getMasterProcedureExecutor()); - // now the state should be fixed - result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable))); - Assert.assertEquals(0, result.size()); - } catch (InterruptedException ie) { - throw new IOException(ie); - } finally { - observer.resetLatch(); - } - } - - @Test - public void testRecoverSplitAfterMetaUpdated() throws Exception { - String testTable = async ? "splitTestAsync" : "splitTestSync"; - TEST_UTIL.createMultiRegionTable(TableName.valueOf(testTable), Bytes.toBytes("family1"), 5); - HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); - Hbck hbck = getHbck(); - FailingSplitAfterMetaUpdatedMasterObserver observer = master.getMasterCoprocessorHost() - .findCoprocessor(FailingSplitAfterMetaUpdatedMasterObserver.class); - assertNotNull(observer); - try { - AsyncAdmin admin = TEST_UTIL.getAsyncConnection().getAdmin(); - byte[] splitKey = Bytes.toBytes("bcd"); - admin.split(TableName.valueOf(testTable), splitKey); - observer.latch.await(5000, TimeUnit.MILLISECONDS); - Map result = - hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable))); - // since there is a split procedure work on the region, thus this check should return a empty - // map. - Assert.assertEquals(0, result.size()); - Optional> procedure = TEST_UTIL.getHBaseCluster().getMaster().getProcedures() - .stream().filter(p -> p instanceof SplitTableRegionProcedure).findAny(); - Assert.assertTrue(procedure.isPresent()); - hbck.bypassProcedure(Arrays.asList(procedure.get().getProcId()), 5, true, false); - result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable))); - Assert.assertEquals(2, result.size()); - hbck.assigns(Arrays.asList(result.keySet().toArray(new String[0])).stream() - .map(regionName -> regionName.split("\\.")[1]).collect(Collectors.toList())); - ProcedureTestingUtility.waitAllProcedures(master.getMasterProcedureExecutor()); - // now the state should be fixed - result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable))); - Assert.assertEquals(0, result.size()); - - //split one of the daughter region again - observer.resetLatch(); - byte[] splitKey2 = Bytes.toBytes("bcde"); - - admin.split(TableName.valueOf(testTable), splitKey2); - observer.latch.await(5000, TimeUnit.MILLISECONDS); - - procedure = TEST_UTIL.getHBaseCluster().getMaster().getProcedures() - .stream().filter(p -> p instanceof SplitTableRegionProcedure).findAny(); - Assert.assertTrue(procedure.isPresent()); - hbck.bypassProcedure(Arrays.asList(procedure.get().getProcId()), 5, true, false); - result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable))); - Assert.assertEquals(2, result.size()); - hbck.assigns(Arrays.asList(result.keySet().toArray(new String[0])).stream() - .map(regionName -> regionName.split("\\.")[1]).collect(Collectors.toList())); - ProcedureTestingUtility.waitAllProcedures(master.getMasterProcedureExecutor()); - // now the state should be fixed - result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable))); - Assert.assertEquals(0, result.size()); - } catch (InterruptedException ie) { - throw new IOException(ie); - } finally { - observer.resetLatch(); - } - } - - @Test public void testScheduleSCP() throws Exception { HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME);