diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 1eea08ba8a20..adc7e7996c90 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -1620,6 +1620,17 @@ public enum OperationStatusCode { */ public static final int BATCH_ROWS_THRESHOLD_DEFAULT = 5000; + /** + * when zookeeper data does not exist on master during meta bootstrap, default to remove the + * meta table directory that is considered as partial meta. + * + * TODO we can remove this feature if we come up a way to define partial meta on during bootstrap + * cluster that does not come with Zookeeper data + */ + public static final String REMOVE_META_ON_RESTART = "hbase.master.remove.meta.on.restart"; + public static final boolean DEFAULT_REMOVE_META_ON_RESTART = true; + + private HConstants() { // Can't be instantiated with this ctor. } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 0cf6de4ec34b..64530b341644 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1078,6 +1078,7 @@ private void finishActiveMasterInitialization(MonitoredTask status) // complete before we do this next step processing offline regions else it fails reading // table states messing up master launch (namespace table, etc., are not assigned). this.assignmentManager.processOfflineRegions(); + this.assignmentManager.processRegionsOnUnknownServers(); // Initialize after meta is up as below scans meta if (favoredNodesManager != null && !maintenanceMode) { SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment = diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 04529f0a48a5..c6b90c963dbc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1441,6 +1441,35 @@ public void processOfflineRegions() { } } + /** + * Create assign procedure for non-offline regions of enabled table that are assigned + * to `unknown` servers after hbase:meta is online. + * + * This is a special case when WAL directory, SCP WALs and ZK data are cleared, + * cluster restarts with hbase:meta table and other tables with storefiles. + */ + public void processRegionsOnUnknownServers() { + List regionsOnUnknownServers = regionStates.getRegionStates().stream() + .filter(s -> !s.isOffline()) + .filter(s -> isTableEnabled(s.getRegion().getTable())) + .filter(s -> !regionStates.isRegionInTransition(s.getRegion())) + .filter(s -> { + ServerName serverName = regionStates.getRegionServerOfRegion(s.getRegion()); + if (serverName == null) { + return false; + } + return master.getServerManager().isServerKnownAndOnline(serverName) + .equals(ServerManager.ServerLiveState.UNKNOWN); + }) + .map(RegionState::getRegion).collect(Collectors.toList()); + if (!regionsOnUnknownServers.isEmpty()) { + LOG.info("Found regions {} on unknown servers, reassign them to online servers", + regionsOnUnknownServers); + master.getMasterProcedureExecutor().submitProcedures( + master.getAssignmentManager().createRoundRobinAssignProcedures(regionsOnUnknownServers)); + } + } + /* AM internal RegionStateStore.RegionStateVisitor implementation. To be used when * scanning META table for region rows, using RegionStateStore utility methods. RegionStateStore * methods will convert Result into proper RegionInfo instances, but those would still need to be diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java index bbdaaa71c299..35432b8b3688 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java @@ -71,7 +71,11 @@ private static void writeFsLayout(Path rootDir, Configuration conf) throws IOExc LOG.info("BOOTSTRAP: creating hbase:meta region"); FileSystem fs = rootDir.getFileSystem(conf); Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.META_TABLE_NAME); - if (fs.exists(tableDir) && !fs.delete(tableDir, true)) { + boolean removeMeta = conf.getBoolean(HConstants.REMOVE_META_ON_RESTART, + HConstants.DEFAULT_REMOVE_META_ON_RESTART); + // we use zookeeper data to tell if this is a partial created meta, if so we should delete + // and recreate the meta table. + if (removeMeta && fs.exists(tableDir) && !fs.delete(tableDir, true)) { LOG.warn("Can not delete partial created meta table, continue..."); } // Bootstrapping, make sure blockcache is off. Else, one will be diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecreateCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecreateCluster.java new file mode 100644 index 000000000000..d9cac08c1383 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecreateCluster.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.time.Duration; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.master.region.MasterRegionFactory; +import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; + +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; + +/** + * Test reuse storefiles within data directory when cluster failover with a set of new region + * servers with different hostnames with or without WALs and Zookeeper ZNodes support. For any + * hbase system table and user table can be assigned normally after cluster restart. + */ +@Category({ LargeTests.class }) +public class TestRecreateCluster { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRecreateCluster.class); + + @Rule + public TestName name = new TestName(); + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final int NUM_RS = 3; + private static final long TIMEOUT_MS = Duration.ofMinutes(2).toMillis(); + + @Before + public void setup() { + TEST_UTIL.getConfiguration().setBoolean(HConstants.REMOVE_META_ON_RESTART, false); + } + + @Test + public void testRecreateCluster_UserTableDisabled() throws Exception { + TEST_UTIL.startMiniCluster(NUM_RS); + try { + TableName tableName = TableName.valueOf("t1"); + prepareDataBeforeRecreate(TEST_UTIL, tableName); + TEST_UTIL.getAdmin().disableTable(tableName); + TEST_UTIL.waitTableDisabled(tableName.getName()); + restartHBaseCluster(true); + TEST_UTIL.getAdmin().enableTable(tableName); + validateDataAfterRecreate(TEST_UTIL, tableName); + } finally { + TEST_UTIL.shutdownMiniCluster(); + } + } + + @Test + public void testRecreateCluster_UserTableEnabled() throws Exception { + validateRecreateClusterWithUserTableEnabled(true); + } + + @Test + public void testRecreateCluster_UserTableEnabled_WithoutCleanupWALsAndZNodes() throws Exception { + TEST_UTIL.getConfiguration().setBoolean(HConstants.REMOVE_META_ON_RESTART, + HConstants.DEFAULT_REMOVE_META_ON_RESTART); + validateRecreateClusterWithUserTableEnabled(false); + } + + private void validateRecreateClusterWithUserTableEnabled(boolean cleanupWALsAndZNodes) + throws Exception { + TEST_UTIL.startMiniCluster(NUM_RS); + try { + TableName tableName = TableName.valueOf("t1"); + prepareDataBeforeRecreate(TEST_UTIL, tableName); + restartHBaseCluster(cleanupWALsAndZNodes); + validateDataAfterRecreate(TEST_UTIL, tableName); + } finally { + TEST_UTIL.shutdownMiniCluster(); + } + } + + private void restartHBaseCluster(boolean cleanUpWALsAndZNodes) throws Exception { + // flush cache so that everything is on disk + TEST_UTIL.getMiniHBaseCluster().flushcache(); + + List oldServers = + TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList(); + + // make sure there is no procedures pending + TEST_UTIL.waitFor(TIMEOUT_MS, () -> TEST_UTIL.getHBaseCluster().getMaster() + .getProcedures().stream().filter(p -> p.isFinished()).findAny().isPresent()); + + // shutdown and delete data if needed + Path walRootDirPath = TEST_UTIL.getMiniHBaseCluster().getMaster().getWALRootDir(); + Path rootDirPath = CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration()); + TEST_UTIL.shutdownMiniHBaseCluster(); + + if (cleanUpWALsAndZNodes) { + TEST_UTIL.getDFSCluster().getFileSystem() + .delete(new Path(rootDirPath, MasterRegionFactory.MASTER_STORE_DIR), true); + TEST_UTIL.getDFSCluster().getFileSystem() + .delete(new Path(walRootDirPath, MasterRegionFactory.MASTER_STORE_DIR), true); + TEST_UTIL.getDFSCluster().getFileSystem() + .delete(new Path(walRootDirPath, WALProcedureStore.MASTER_PROCEDURE_LOGDIR), true); + + TEST_UTIL.getDFSCluster().getFileSystem() + .delete(new Path(walRootDirPath, HConstants.HREGION_LOGDIR_NAME), true); + TEST_UTIL.getDFSCluster().getFileSystem() + .delete(new Path(walRootDirPath, HConstants.HREGION_OLDLOGDIR_NAME), true); + // delete all zk data + // we cannot keep ZK data because it will hold the meta region states as open and + // didn't submit a InitMetaProcedure + ZKUtil.deleteChildrenRecursively(TEST_UTIL.getZooKeeperWatcher(), + TEST_UTIL.getZooKeeperWatcher().getZNodePaths().baseZNode); + TEST_UTIL.shutdownMiniZKCluster(); + TEST_UTIL.startMiniZKCluster(); + } + + TEST_UTIL.restartHBaseCluster(NUM_RS); + TEST_UTIL.waitFor(TIMEOUT_MS, + () -> TEST_UTIL.getMiniHBaseCluster().getNumLiveRegionServers() == NUM_RS); + + // make sure we have a new set of region servers with different hostnames and ports + List newServers = + TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList(); + assertFalse(newServers.stream().filter(newServer -> oldServers.contains(newServer)).findAny() + .isPresent()); + } + + private void prepareDataBeforeRecreate( + HBaseTestingUtility testUtil, TableName tableName) throws Exception { + Table table = testUtil.createTable(tableName, "f"); + Put put = new Put(Bytes.toBytes("r1")); + put.addColumn(Bytes.toBytes("f"), Bytes.toBytes("c"), Bytes.toBytes("v")); + table.put(put); + + ensureTableNotColocatedWithSystemTable(tableName, TableName.NAMESPACE_TABLE_NAME); + } + + private void ensureTableNotColocatedWithSystemTable(TableName userTable, TableName systemTable) + throws IOException, InterruptedException { + MiniHBaseCluster hbaseCluster = TEST_UTIL.getHBaseCluster(); + assertTrue("Please start more than 1 regionserver", + hbaseCluster.getRegionServerThreads().size() > 1); + + int userTableServerNum = getServerNumForTableWithOnlyOneRegion(userTable); + int systemTableServerNum = getServerNumForTableWithOnlyOneRegion(systemTable); + + if (userTableServerNum != systemTableServerNum) { + // no-ops if user table and system are already on a different host + return; + } + + int destServerNum = (systemTableServerNum + 1) % NUM_RS; + assertTrue(systemTableServerNum != destServerNum); + + HRegionServer systemTableServer = hbaseCluster.getRegionServer(systemTableServerNum); + HRegionServer destServer = hbaseCluster.getRegionServer(destServerNum); + assertTrue(!systemTableServer.equals(destServer)); + // make sure the dest server is live before moving region + hbaseCluster.waitForRegionServerToStart(destServer.getServerName().getHostname(), + destServer.getServerName().getPort(), TIMEOUT_MS); + // move region of userTable to a different regionserver not co-located with system table + TEST_UTIL.moveRegionAndWait(TEST_UTIL.getAdmin().getRegions(userTable).get(0), + destServer.getServerName()); + } + + private int getServerNumForTableWithOnlyOneRegion(TableName tableName) throws IOException { + List tableRegionInfos = TEST_UTIL.getAdmin().getRegions(tableName); + assertEquals(1, tableRegionInfos.size()); + return TEST_UTIL.getHBaseCluster() + .getServerWith(tableRegionInfos.get(0).getRegionName()); + } + + private void validateDataAfterRecreate( + HBaseTestingUtility testUtil, TableName tableName) throws Exception { + Table t1 = testUtil.getConnection().getTable(tableName); + Get get = new Get(Bytes.toBytes("r1")); + get.addColumn(Bytes.toBytes("f"), Bytes.toBytes("c")); + Result result = t1.get(get); + assertTrue(result.advance()); + Cell cell = result.current(); + assertEquals("v", Bytes.toString(cell.getValueArray(), + cell.getValueOffset(), cell.getValueLength())); + assertFalse(result.advance()); + } + +}