diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRegistry.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRegistry.java index 96329dcfb878..9537777db1ad 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRegistry.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRegistry.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,15 +19,12 @@ import java.io.Closeable; import java.util.concurrent.CompletableFuture; - import org.apache.hadoop.hbase.RegionLocations; import org.apache.hadoop.hbase.ServerName; import org.apache.yetus.audience.InterfaceAudience; /** * Implementations hold cluster information such as this cluster's id, location of hbase:meta, etc.. - * All stuffs that may be related to zookeeper at client side are placed here. - *

* Internal use only. */ @InterfaceAudience.Private @@ -45,21 +42,11 @@ interface AsyncRegistry extends Closeable { */ CompletableFuture getClusterId(); - /** - * Get the number of 'running' regionservers. - */ - CompletableFuture getCurrentNrHRS(); - /** * Get the address of HMaster. */ CompletableFuture getMasterAddress(); - /** - * Get the info port of HMaster. - */ - CompletableFuture getMasterInfoPort(); - /** * Closes this instance and releases any system resources associated with it */ diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZKAsyncRegistry.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZKAsyncRegistry.java index 36fa6bba7544..08e3846e39d2 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZKAsyncRegistry.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZKAsyncRegistry.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -24,7 +24,6 @@ import static org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil.lengthOfPBMagic; import static org.apache.hadoop.hbase.util.FutureUtils.addListener; import static org.apache.hadoop.hbase.zookeeper.ZKMetadata.removeMetaData; - import java.io.IOException; import java.util.List; import java.util.concurrent.CompletableFuture; @@ -43,14 +42,12 @@ import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; - import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos; /** - * Fetch the registry data from zookeeper. + * Zookeeper based registry implementation. */ @InterfaceAudience.Private class ZKAsyncRegistry implements AsyncRegistry { @@ -210,11 +207,6 @@ public CompletableFuture getMetaRegionLocation() { return future; } - @Override - public CompletableFuture getCurrentNrHRS() { - return zk.exists(znodePaths.rsZNode).thenApply(s -> s != null ? s.getNumChildren() : 0); - } - private static ZooKeeperProtos.Master getMasterProto(byte[] data) throws IOException { if (data == null || data.length == 0) { return null; @@ -237,12 +229,6 @@ public CompletableFuture getMasterAddress() { }); } - @Override - public CompletableFuture getMasterInfoPort() { - return getAndConvert(znodePaths.masterAddressZNode, ZKAsyncRegistry::getMasterProto) - .thenApply(proto -> proto != null ? proto.getInfoPort() : 0); - } - @Override public void close() { zk.close(); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java index c6e48d420148..d117382ca121 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -80,6 +80,7 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.client.RegionLoadStats; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.RegionStatesCount; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; @@ -93,6 +94,7 @@ import org.apache.hadoop.hbase.filter.ByteArrayComparable; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.io.TimeRange; +import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.protobuf.ProtobufMagic; import org.apache.hadoop.hbase.protobuf.ProtobufMessageConverter; import org.apache.hadoop.hbase.quotas.QuotaScope; @@ -3059,6 +3061,44 @@ public static ProcedureDescription buildProcedureDescription(String signature, S return builder.build(); } + /** + * Get the Meta region state from the passed data bytes. Can handle both old and new style + * server names. + * @param data protobuf serialized data with meta server name. + * @param replicaId replica ID for this region + * @return RegionState instance corresponding to the serialized data. + * @throws DeserializationException if the data is invalid. + */ + public static RegionState parseMetaRegionStateFrom(final byte[] data, int replicaId) + throws DeserializationException { + RegionState.State state = RegionState.State.OPEN; + ServerName serverName; + if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) { + try { + int prefixLen = ProtobufUtil.lengthOfPBMagic(); + ZooKeeperProtos.MetaRegionServer rl = + ZooKeeperProtos.MetaRegionServer.parser().parseFrom(data, prefixLen, + data.length - prefixLen); + if (rl.hasState()) { + state = RegionState.State.convert(rl.getState()); + } + HBaseProtos.ServerName sn = rl.getServer(); + serverName = ServerName.valueOf( + sn.getHostName(), sn.getPort(), sn.getStartCode()); + } catch (InvalidProtocolBufferException e) { + throw new DeserializationException("Unable to parse meta region location"); + } + } else { + // old style of meta region location? + serverName = parseServerNameFrom(data); + } + if (serverName == null) { + state = RegionState.State.OFFLINE; + } + return new RegionState(RegionReplicaUtil.getRegionInfoForReplica( + RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId), state, serverName); + } + /** * Get a ServerName from the passed in data bytes. * @param data Data with a serialize server name in it; can handle the old style diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java index c8511d4dd617..d923ebf78772 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -40,7 +40,8 @@ public class ZNodePaths { // TODO: Replace this with ZooKeeper constant when ZOOKEEPER-277 is resolved. public static final char ZNODE_PATH_SEPARATOR = '/'; - public final static String META_ZNODE_PREFIX = "meta-region-server"; + public static final String META_ZNODE_PREFIX_CONF_KEY = "zookeeper.znode.metaserver"; + public static final String META_ZNODE_PREFIX = "meta-region-server"; private static final String DEFAULT_SNAPSHOT_CLEANUP_ZNODE = "snapshot-cleanup"; // base znode for this cluster @@ -94,7 +95,7 @@ public class ZNodePaths { public ZNodePaths(Configuration conf) { baseZNode = conf.get(ZOOKEEPER_ZNODE_PARENT, DEFAULT_ZOOKEEPER_ZNODE_PARENT); ImmutableMap.Builder builder = ImmutableMap.builder(); - metaZNodePrefix = conf.get("zookeeper.znode.metaserver", META_ZNODE_PREFIX); + metaZNodePrefix = conf.get(META_ZNODE_PREFIX_CONF_KEY, META_ZNODE_PREFIX); String defaultMetaReplicaZNode = ZNodePaths.joinZNode(baseZNode, metaZNodePrefix); builder.put(DEFAULT_REPLICA_ID, defaultMetaReplicaZNode); int numMetaReplicas = conf.getInt(META_REPLICAS_NUM, DEFAULT_META_REPLICA_NUM); @@ -178,6 +179,18 @@ public String getZNodeForReplica(int replicaId) { .orElseGet(() -> metaReplicaZNodes.get(DEFAULT_REPLICA_ID) + "-" + replicaId); } + /** + * Parses the meta replicaId from the passed path. + * @param path the name of the full path which includes baseZNode. + * @return replicaId + */ + public int getMetaReplicaIdFromPath(String path) { + // Extract the znode from path. The prefix is of the following format. + // baseZNode + PATH_SEPARATOR. + int prefixLen = baseZNode.length() + 1; + return getMetaReplicaIdFromZnode(path.substring(prefixLen)); + } + /** * Parse the meta replicaId from the passed znode * @param znode the name of the znode, does not include baseZNode diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/DoNothingAsyncRegistry.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/DoNothingAsyncRegistry.java index 66330687a26a..8c7b07384c72 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/DoNothingAsyncRegistry.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/DoNothingAsyncRegistry.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,7 +18,6 @@ package org.apache.hadoop.hbase.client; import java.util.concurrent.CompletableFuture; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.RegionLocations; import org.apache.hadoop.hbase.ServerName; @@ -43,21 +42,11 @@ public CompletableFuture getClusterId() { return CompletableFuture.completedFuture(null); } - @Override - public CompletableFuture getCurrentNrHRS() { - return CompletableFuture.completedFuture(0); - } - @Override public CompletableFuture getMasterAddress() { return CompletableFuture.completedFuture(null); } - @Override - public CompletableFuture getMasterInfoPort() { - return CompletableFuture.completedFuture(0); - } - @Override public void close() { } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java index 50798ed60a0a..99cab625b85c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -17,25 +17,24 @@ * limitations under the License. */ package org.apache.hadoop.hbase.master; - import java.io.IOException; +import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; - -import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; -import org.apache.hadoop.hbase.zookeeper.ZKUtil; -import org.apache.hadoop.hbase.zookeeper.ZKWatcher; -import org.apache.hadoop.hbase.zookeeper.ZNodePaths; -import org.apache.yetus.audience.InterfaceAudience; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ZNodeClearer; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.monitoring.MonitoredTask; -import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; import org.apache.hadoop.hbase.zookeeper.ZKListener; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZKWatcher; +import org.apache.hadoop.hbase.zookeeper.ZNodePaths; +import org.apache.yetus.audience.InterfaceAudience; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; /** * Handles everything on master-side related to master election. @@ -57,12 +56,18 @@ public class ActiveMasterManager extends ZKListener { final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false); final AtomicBoolean clusterShutDown = new AtomicBoolean(false); + // This server's information. private final ServerName sn; private int infoPort; private final Server master; + // Active master's server name. Invalidated anytime active master changes (based on ZK + // notifications) and lazily fetched on-demand. + // ServerName is immutable, so we don't need heavy synchronization around it. + private volatile ServerName activeMasterServerName; + /** - * @param watcher + * @param watcher ZK watcher * @param sn ServerName * @param master In an instance of a Master. */ @@ -106,6 +111,30 @@ void handle(final String path) { } } + /** + * Fetches the active master's ServerName from zookeeper. + */ + private void fetchAndSetActiveMasterServerName() { + LOG.debug("Attempting to fetch active master sn from zk"); + try { + activeMasterServerName = MasterAddressTracker.getMasterAddress(watcher); + } catch (IOException | KeeperException e) { + // Log and ignore for now and re-fetch later if needed. + LOG.error("Error fetching active master information", e); + } + } + + public Optional getActiveMasterServerName() { + if (!clusterHasActiveMaster.get()) { + return Optional.empty(); + } + if (activeMasterServerName == null) { + fetchAndSetActiveMasterServerName(); + } + // It could still be null, but return whatever we have. + return Optional.ofNullable(activeMasterServerName); + } + /** * Handle a change in the master node. Doesn't matter whether this was called * from a nodeCreated or nodeDeleted event because there are no guarantees @@ -134,6 +163,9 @@ private void handleMasterNodeChange() { // Notify any thread waiting to become the active master clusterHasActiveMaster.notifyAll(); } + // Reset the active master sn. Will be re-fetched later if needed. + // We don't want to make a synchronous RPC under a monitor. + activeMasterServerName = null; } } catch (KeeperException ke) { master.abort("Received an unexpected KeeperException, aborting", ke); @@ -151,8 +183,8 @@ private void handleMasterNodeChange() { * @param checkInterval the interval to check if the master is stopped * @param startupStatus the monitor status to track the progress * @return True if no issue becoming active master else false if another - * master was running or if some other problem (zookeeper, stop flag has been - * set on this Master) + * master was running or if some other problem (zookeeper, stop flag has been + * set on this Master) */ boolean blockUntilBecomingActiveMaster( int checkInterval, MonitoredTask startupStatus) { @@ -178,10 +210,14 @@ boolean blockUntilBecomingActiveMaster( // We are the master, return startupStatus.setStatus("Successfully registered as active master."); this.clusterHasActiveMaster.set(true); + activeMasterServerName = sn; LOG.info("Registered as active master=" + this.sn); return true; } + // Invalidate the active master name so that subsequent requests do not get any stale + // master information. Will be re-fetched if needed. + activeMasterServerName = null; // There is another active master running elsewhere or this is a restart // and the master ephemeral node has not expired yet. this.clusterHasActiveMaster.set(true); @@ -208,7 +244,8 @@ boolean blockUntilBecomingActiveMaster( ZKUtil.deleteNode(this.watcher, this.watcher.getZNodePaths().masterAddressZNode); // We may have failed to delete the znode at the previous step, but - // we delete the file anyway: a second attempt to delete the znode is likely to fail again. + // we delete the file anyway: a second attempt to delete the znode is likely to fail + // again. ZNodeClearer.deleteMyEphemeralNodeOnDisk(); } else { msg = "Another master is the active master, " + currentMaster + diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index cc226bd58c3d..55d1a1f34c07 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -355,6 +355,12 @@ public void run() { // manager of assignment nodes in zookeeper private AssignmentManager assignmentManager; + /** + * Cache for the meta region replica's locations. Also tracks their changes to avoid stale + * cache entries. + */ + private final MetaRegionLocationCache metaRegionLocationCache; + // manager of replication private ReplicationPeerManager replicationPeerManager; @@ -508,8 +514,7 @@ public void doGet(HttpServletRequest request, * #finishActiveMasterInitialization(MonitoredTask) after * the master becomes the active one. */ - public HMaster(final Configuration conf) - throws IOException, KeeperException { + public HMaster(final Configuration conf) throws IOException { super(conf); TraceUtil.initTracer(conf); try { @@ -522,7 +527,6 @@ public HMaster(final Configuration conf) } else { maintenanceMode = false; } - this.rsFatals = new MemoryBoundedLogMessageBuffer( conf.getLong("hbase.master.buffer.for.rs.fatals", 1 * 1024 * 1024)); LOG.info("hbase.rootdir={}, hbase.cluster.distributed={}", getDataRootDir(), @@ -570,8 +574,10 @@ public HMaster(final Configuration conf) // Some unit tests don't need a cluster, so no zookeeper at all if (!conf.getBoolean("hbase.testing.nocluster", false)) { + this.metaRegionLocationCache = new MetaRegionLocationCache(this.zooKeeper); this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName, this); } else { + this.metaRegionLocationCache = null; this.activeMasterManager = null; } cachedClusterId = new CachedClusterId(conf); @@ -3869,6 +3875,10 @@ public String getClusterId() { return cachedClusterId.getFromCacheOrFetch(); } + public Optional getActiveMaster() { + return activeMasterManager.getActiveMasterServerName(); + } + @Override public void runReplicationBarrierCleaner() { ReplicationBarrierCleaner rbc = this.replicationBarrierCleaner; @@ -3876,4 +3886,8 @@ public void runReplicationBarrierCleaner() { rbc.chore(); } } + + public MetaRegionLocationCache getMetaRegionLocationCache() { + return this.metaRegionLocationCache; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaRegionLocationCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaRegionLocationCache.java new file mode 100644 index 000000000000..f4e91b56051d --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaRegionLocationCache.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ConcurrentNavigableMap; +import java.util.concurrent.ThreadFactory; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.apache.hadoop.hbase.types.CopyOnWriteArrayMap; +import org.apache.hadoop.hbase.util.RetryCounter; +import org.apache.hadoop.hbase.util.RetryCounterFactory; +import org.apache.hadoop.hbase.zookeeper.ZKListener; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZKWatcher; +import org.apache.hadoop.hbase.zookeeper.ZNodePaths; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; + +/** + * A cache of meta region location metadata. Registers a listener on ZK to track changes to the + * meta table znodes. Clients are expected to retry if the meta information is stale. This class + * is thread-safe (a single instance of this class can be shared by multiple threads without race + * conditions). + */ +@InterfaceAudience.Private +public class MetaRegionLocationCache extends ZKListener { + + private static final Logger LOG = LoggerFactory.getLogger(MetaRegionLocationCache.class); + + /** + * Maximum number of times we retry when ZK operation times out. + */ + private static final int MAX_ZK_META_FETCH_RETRIES = 10; + /** + * Sleep interval ms between ZK operation retries. + */ + private static final int SLEEP_INTERVAL_MS_BETWEEN_RETRIES = 1000; + private static final int SLEEP_INTERVAL_MS_MAX = 10000; + private final RetryCounterFactory retryCounterFactory = + new RetryCounterFactory(MAX_ZK_META_FETCH_RETRIES, SLEEP_INTERVAL_MS_BETWEEN_RETRIES); + + /** + * Cached meta region locations indexed by replica ID. + * CopyOnWriteArrayMap ensures synchronization during updates and a consistent snapshot during + * client requests. Even though CopyOnWriteArrayMap copies the data structure for every write, + * that should be OK since the size of the list is often small and mutations are not too often + * and we do not need to block client requests while mutations are in progress. + */ + private final CopyOnWriteArrayMap cachedMetaLocations; + + private enum ZNodeOpType { + INIT, + CREATED, + CHANGED, + DELETED + } + + public MetaRegionLocationCache(ZKWatcher zkWatcher) { + super(zkWatcher); + cachedMetaLocations = new CopyOnWriteArrayMap<>(); + watcher.registerListener(this); + // Populate the initial snapshot of data from meta znodes. + // This is needed because stand-by masters can potentially start after the initial znode + // creation. It blocks forever until the initial meta locations are loaded from ZK and watchers + // are established. Subsequent updates are handled by the registered listener. Also, this runs + // in a separate thread in the background to not block master init. + ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).build(); + RetryCounterFactory retryFactory = new RetryCounterFactory( + Integer.MAX_VALUE, SLEEP_INTERVAL_MS_BETWEEN_RETRIES, SLEEP_INTERVAL_MS_MAX); + threadFactory.newThread( + ()->loadMetaLocationsFromZk(retryFactory.create(), ZNodeOpType.INIT)).start(); + } + + /** + * Populates the current snapshot of meta locations from ZK. If no meta znodes exist, it registers + * a watcher on base znode to check for any CREATE/DELETE events on the children. + * @param retryCounter controls the number of retries and sleep between retries. + */ + private void loadMetaLocationsFromZk(RetryCounter retryCounter, ZNodeOpType opType) { + List znodes = null; + while (retryCounter.shouldRetry()) { + try { + znodes = watcher.getMetaReplicaNodesAndWatchChildren(); + break; + } catch (KeeperException ke) { + LOG.debug("Error populating initial meta locations", ke); + if (!retryCounter.shouldRetry()) { + // Retries exhausted and watchers not set. This is not a desirable state since the cache + // could remain stale forever. Propagate the exception. + watcher.abort("Error populating meta locations", ke); + return; + } + try { + retryCounter.sleepUntilNextRetry(); + } catch (InterruptedException ie) { + LOG.error("Interrupted while loading meta locations from ZK", ie); + Thread.currentThread().interrupt(); + return; + } + } + } + if (znodes == null || znodes.isEmpty()) { + // No meta znodes exist at this point but we registered a watcher on the base znode to listen + // for updates. They will be handled via nodeChildrenChanged(). + return; + } + if (znodes.size() == cachedMetaLocations.size()) { + // No new meta znodes got added. + return; + } + for (String znode: znodes) { + String path = ZNodePaths.joinZNode(watcher.getZNodePaths().baseZNode, znode); + updateMetaLocation(path, opType); + } + } + + /** + * Gets the HRegionLocation for a given meta replica ID. Renews the watch on the znode for + * future updates. + * @param replicaId ReplicaID of the region. + * @return HRegionLocation for the meta replica. + * @throws KeeperException if there is any issue fetching/parsing the serialized data. + */ + private HRegionLocation getMetaRegionLocation(int replicaId) + throws KeeperException { + RegionState metaRegionState; + try { + byte[] data = ZKUtil.getDataAndWatch(watcher, + watcher.getZNodePaths().getZNodeForReplica(replicaId)); + metaRegionState = ProtobufUtil.parseMetaRegionStateFrom(data, replicaId); + } catch (DeserializationException e) { + throw ZKUtil.convert(e); + } + return new HRegionLocation(metaRegionState.getRegion(), metaRegionState.getServerName()); + } + + private void updateMetaLocation(String path, ZNodeOpType opType) { + if (!isValidMetaZNode(path)) { + return; + } + LOG.debug("Updating meta znode for path {}: {}", path, opType.name()); + int replicaId = watcher.getZNodePaths().getMetaReplicaIdFromPath(path); + RetryCounter retryCounter = retryCounterFactory.create(); + HRegionLocation location = null; + while (retryCounter.shouldRetry()) { + try { + if (opType == ZNodeOpType.DELETED) { + if (!ZKUtil.watchAndCheckExists(watcher, path)) { + // The path does not exist, we've set the watcher and we can break for now. + break; + } + // If it is a transient error and the node appears right away, we fetch the + // latest meta state. + } + location = getMetaRegionLocation(replicaId); + break; + } catch (KeeperException e) { + LOG.debug("Error getting meta location for path {}", path, e); + if (!retryCounter.shouldRetry()) { + LOG.warn("Error getting meta location for path {}. Retries exhausted.", path, e); + break; + } + try { + retryCounter.sleepUntilNextRetry(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + return; + } + } + } + if (location == null) { + cachedMetaLocations.remove(replicaId); + return; + } + cachedMetaLocations.put(replicaId, location); + } + + /** + * @return Optional list of HRegionLocations for meta replica(s), null if the cache is empty. + * + */ + public Optional> getMetaRegionLocations() { + ConcurrentNavigableMap snapshot = + cachedMetaLocations.tailMap(cachedMetaLocations.firstKey()); + if (snapshot.isEmpty()) { + // This could be possible if the master has not successfully initialized yet or meta region + // is stuck in some weird state. + return Optional.empty(); + } + List result = new ArrayList<>(); + // Explicitly iterate instead of new ArrayList<>(snapshot.values()) because the underlying + // ArrayValueCollection does not implement toArray(). + snapshot.values().forEach(location -> result.add(location)); + return Optional.of(result); + } + + /** + * Helper to check if the given 'path' corresponds to a meta znode. This listener is only + * interested in changes to meta znodes. + */ + private boolean isValidMetaZNode(String path) { + return watcher.getZNodePaths().isAnyMetaReplicaZNode(path); + } + + @Override + public void nodeCreated(String path) { + updateMetaLocation(path, ZNodeOpType.CREATED); + } + + @Override + public void nodeDeleted(String path) { + updateMetaLocation(path, ZNodeOpType.DELETED); + } + + @Override + public void nodeDataChanged(String path) { + updateMetaLocation(path, ZNodeOpType.CHANGED); + } + + @Override + public void nodeChildrenChanged(String path) { + if (!path.equals(watcher.getZNodePaths().baseZNode)) { + return; + } + loadMetaLocationsFromZk(retryCounterFactory.create(), ZNodeOpType.CHANGED); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/DummyAsyncRegistry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/DummyAsyncRegistry.java index e9ae25d2eaf0..245876e7eefe 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/DummyAsyncRegistry.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/DummyAsyncRegistry.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -39,21 +39,11 @@ public CompletableFuture getClusterId() { return null; } - @Override - public CompletableFuture getCurrentNrHRS() { - return null; - } - @Override public CompletableFuture getMasterAddress() { return null; } - @Override - public CompletableFuture getMasterInfoPort() { - return null; - } - @Override public void close() { } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaRegionLocationCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaRegionLocationCache.java new file mode 100644 index 000000000000..02236a67f44a --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaRegionLocationCache.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.MultithreadedTestUtil; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.MetaRegionLocationCache; +import org.apache.hadoop.hbase.master.RegionState; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZKWatcher; +import org.apache.hadoop.hbase.zookeeper.ZNodePaths; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({SmallTests.class, MasterTests.class }) +public class TestMetaRegionLocationCache { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestMetaRegionLocationCache.class); + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static AsyncRegistry REGISTRY; + + @BeforeClass + public static void setUp() throws Exception { + TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, 3); + TEST_UTIL.startMiniCluster(3); + REGISTRY = AsyncRegistryFactory.getRegistry(TEST_UTIL.getConfiguration()); + RegionReplicaTestHelper.waitUntilAllMetaReplicasHavingRegionLocation( + TEST_UTIL.getConfiguration(), REGISTRY, 3); + TEST_UTIL.getAdmin().balancerSwitch(false, true); + } + + @AfterClass + public static void cleanUp() throws Exception { + IOUtils.closeQuietly(REGISTRY); + TEST_UTIL.shutdownMiniCluster(); + } + + private List getCurrentMetaLocations(ZKWatcher zk) throws Exception { + List result = new ArrayList<>(); + for (String znode: zk.getMetaReplicaNodes()) { + String path = ZNodePaths.joinZNode(zk.getZNodePaths().baseZNode, znode); + int replicaId = zk.getZNodePaths().getMetaReplicaIdFromPath(path); + RegionState state = MetaTableLocator.getMetaRegionState(zk, replicaId); + result.add(new HRegionLocation(state.getRegion(), state.getServerName())); + } + return result; + } + + // Verifies that the cached meta locations in the given master are in sync with what is in ZK. + private void verifyCachedMetaLocations(HMaster master) throws Exception { + // Wait until initial meta locations are loaded. + int retries = 0; + while (!master.getMetaRegionLocationCache().getMetaRegionLocations().isPresent()) { + Thread.sleep(1000); + if (++retries == 10) { + break; + } + } + List metaHRLs = + master.getMetaRegionLocationCache().getMetaRegionLocations().get(); + assertFalse(metaHRLs.isEmpty()); + ZKWatcher zk = master.getZooKeeper(); + List metaZnodes = zk.getMetaReplicaNodes(); + assertEquals(metaZnodes.size(), metaHRLs.size()); + List actualHRLs = getCurrentMetaLocations(zk); + Collections.sort(metaHRLs); + Collections.sort(actualHRLs); + assertEquals(actualHRLs, metaHRLs); + } + + @Test public void testInitialMetaLocations() throws Exception { + verifyCachedMetaLocations(TEST_UTIL.getMiniHBaseCluster().getMaster()); + } + + @Test public void testStandByMetaLocations() throws Exception { + HMaster standBy = TEST_UTIL.getMiniHBaseCluster().startMaster().getMaster(); + verifyCachedMetaLocations(standBy); + } + + /* + * Shuffles the meta region replicas around the cluster and makes sure the cache is not stale. + */ + @Test public void testMetaLocationsChange() throws Exception { + List currentMetaLocs = + getCurrentMetaLocations(TEST_UTIL.getMiniHBaseCluster().getMaster().getZooKeeper()); + // Move these replicas to random servers. + for (HRegionLocation location: currentMetaLocs) { + RegionReplicaTestHelper.moveRegion(TEST_UTIL, location); + } + RegionReplicaTestHelper.waitUntilAllMetaReplicasHavingRegionLocation( + TEST_UTIL.getConfiguration(), REGISTRY, 3); + for (JVMClusterUtil.MasterThread masterThread: + TEST_UTIL.getMiniHBaseCluster().getMasterThreads()) { + verifyCachedMetaLocations(masterThread.getMaster()); + } + } + + /** + * Tests MetaRegionLocationCache's init procedure to make sure that it correctly watches the base + * znode for notifications. + */ + @Test public void testMetaRegionLocationCache() throws Exception { + final String parentZnodeName = "/randomznodename"; + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parentZnodeName); + ServerName sn = ServerName.valueOf("localhost", 1234, 5678); + try (ZKWatcher zkWatcher = new ZKWatcher(conf, null, null, true)) { + // A thread that repeatedly creates and drops an unrelated child znode. This is to simulate + // some ZK activity in the background. + MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(conf); + ctx.addThread(new MultithreadedTestUtil.RepeatingTestThread(ctx) { + @Override public void doAnAction() throws Exception { + final String testZnode = parentZnodeName + "/child"; + ZKUtil.createNodeIfNotExistsAndWatch(zkWatcher, testZnode, testZnode.getBytes()); + ZKUtil.deleteNode(zkWatcher, testZnode); + } + }); + ctx.startThreads(); + try { + MetaRegionLocationCache metaCache = new MetaRegionLocationCache(zkWatcher); + // meta znodes do not exist at this point, cache should be empty. + assertFalse(metaCache.getMetaRegionLocations().isPresent()); + // Set the meta locations for a random meta replicas, simulating an active hmaster meta + // assignment. + for (int i = 0; i < 3; i++) { + // Updates the meta znodes. + MetaTableLocator.setMetaLocation(zkWatcher, sn, i, RegionState.State.OPEN); + } + // Wait until the meta cache is populated. + int iters = 0; + while (iters++ < 10) { + if (metaCache.getMetaRegionLocations().isPresent() + && metaCache.getMetaRegionLocations().get().size() == 3) { + break; + } + Thread.sleep(1000); + } + List metaLocations = metaCache.getMetaRegionLocations().get(); + assertEquals(3, metaLocations.size()); + for (HRegionLocation location : metaLocations) { + assertEquals(sn, location.getServerName()); + } + } finally { + // clean up. + ctx.stop(); + ZKUtil.deleteChildrenRecursively(zkWatcher, parentZnodeName); + } + } + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java index 5a72daea2030..3e4ca94eca98 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -25,7 +25,6 @@ import static org.junit.Assert.assertNotSame; import static org.junit.Assert.assertThat; import static org.junit.Assert.fail; - import java.io.IOException; import java.util.concurrent.ExecutionException; import java.util.stream.IntStream; @@ -84,11 +83,8 @@ public void test() throws InterruptedException, ExecutionException, IOException String expectedClusterId = TEST_UTIL.getHBaseCluster().getMaster().getClusterId(); assertEquals("Expected " + expectedClusterId + ", found=" + clusterId, expectedClusterId, clusterId); - assertEquals(TEST_UTIL.getHBaseCluster().getClusterMetrics().getLiveServerMetrics().size(), - REGISTRY.getCurrentNrHRS().get().intValue()); assertEquals(TEST_UTIL.getHBaseCluster().getMaster().getServerName(), REGISTRY.getMasterAddress().get()); - assertEquals(-1, REGISTRY.getMasterInfoPort().get().intValue()); RegionReplicaTestHelper .waitUntilAllMetaReplicasHavingRegionLocation(TEST_UTIL.getConfiguration(), REGISTRY, 3); RegionLocations locs = REGISTRY.getMetaRegionLocation().get(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java index 4649eea5ce06..84837f93965a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.master; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -91,6 +92,7 @@ public static void tearDownAfterClass() throws Exception { ActiveMasterManager activeMasterManager = dummyMaster.getActiveMasterManager(); assertFalse(activeMasterManager.clusterHasActiveMaster.get()); + assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); // First test becoming the active master uninterrupted MonitoredTask status = Mockito.mock(MonitoredTask.class); @@ -99,6 +101,7 @@ public static void tearDownAfterClass() throws Exception { activeMasterManager.blockUntilBecomingActiveMaster(100, status); assertTrue(activeMasterManager.clusterHasActiveMaster.get()); assertMaster(zk, master); + assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); // Now pretend master restart DummyMaster secondDummyMaster = new DummyMaster(zk,master); @@ -108,6 +111,8 @@ public static void tearDownAfterClass() throws Exception { activeMasterManager.blockUntilBecomingActiveMaster(100, status); assertTrue(activeMasterManager.clusterHasActiveMaster.get()); assertMaster(zk, master); + assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); + assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get()); } /** @@ -135,6 +140,7 @@ public void testActiveMasterManagerFromZK() throws Exception { ActiveMasterManager activeMasterManager = ms1.getActiveMasterManager(); assertFalse(activeMasterManager.clusterHasActiveMaster.get()); + assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); // First test becoming the active master uninterrupted ClusterStatusTracker clusterStatusTracker = @@ -144,6 +150,7 @@ public void testActiveMasterManagerFromZK() throws Exception { Mockito.mock(MonitoredTask.class)); assertTrue(activeMasterManager.clusterHasActiveMaster.get()); assertMaster(zk, firstMasterAddress); + assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); // New manager will now try to become the active master in another thread WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress); @@ -161,6 +168,8 @@ public void testActiveMasterManagerFromZK() throws Exception { assertTrue(t.manager.clusterHasActiveMaster.get()); // But secondary one should not be the active master assertFalse(t.isActiveMaster); + // Verify the active master ServerName is populated in standby master. + assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get()); // Close the first server and delete it's master node ms1.stop("stopping first server"); @@ -189,6 +198,7 @@ public void testActiveMasterManagerFromZK() throws Exception { assertTrue(t.manager.clusterHasActiveMaster.get()); assertTrue(t.isActiveMaster); + assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get()); LOG.info("Deleting master node"); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCloseAnOpeningRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCloseAnOpeningRegion.java index ba4d53510faa..492222464f99 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCloseAnOpeningRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCloseAnOpeningRegion.java @@ -35,7 +35,6 @@ import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -65,7 +64,7 @@ public class TestCloseAnOpeningRegion { public static final class MockHMaster extends HMaster { - public MockHMaster(Configuration conf) throws IOException, KeeperException { + public MockHMaster(Configuration conf) throws IOException { super(conf); } @@ -141,4 +140,4 @@ public void test() throws IOException, InterruptedException { table.put(new Put(Bytes.toBytes(0)).addColumn(CF, Bytes.toBytes("cq"), Bytes.toBytes(0))); } } -} \ No newline at end of file +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java index a6844fcac091..84722df676dc 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java @@ -42,7 +42,6 @@ import org.apache.hadoop.hbase.procedure2.Procedure; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; -import org.apache.zookeeper.KeeperException; import org.junit.ClassRule; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -147,7 +146,7 @@ private void setupTable() throws Exception { public static final class HMasterForTest extends HMaster { - public HMasterForTest(Configuration conf) throws IOException, KeeperException { + public HMasterForTest(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java index 22554d355b9a..d29e061d07fd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java @@ -28,7 +28,6 @@ import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; -import org.apache.zookeeper.KeeperException; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -120,7 +119,7 @@ public void testChoreSchedule() throws Exception { // Make it public so that JVMClusterUtil can access it. public static class TestHMaster extends HMaster { - public TestHMaster(Configuration conf) throws IOException, KeeperException { + public TestHMaster(Configuration conf) throws IOException { super(conf); } } @@ -144,4 +143,4 @@ public boolean isStopped() { } -} \ No newline at end of file +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java index d3a85209ccac..a42a4046d54f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; -import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -56,7 +55,7 @@ public class TestShutdownBackupMaster { public static final class MockHMaster extends HMaster { - public MockHMaster(Configuration conf) throws IOException, KeeperException { + public MockHMaster(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureBackoff.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureBackoff.java index ca0384e893eb..4112da713c14 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureBackoff.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureBackoff.java @@ -35,7 +35,6 @@ import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -71,7 +70,7 @@ void persistToMeta(RegionStateNode regionNode) throws IOException { public static final class HMasterForTest extends HMaster { - public HMasterForTest(Configuration conf) throws IOException, KeeperException { + public HMasterForTest(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureHang.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureHang.java index 0463721656b4..a25368f4e420 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureHang.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureHang.java @@ -105,7 +105,7 @@ public ReportRegionStateTransitionResponse reportRegionStateTransition( public static final class HMasterForTest extends HMaster { - public HMasterForTest(Configuration conf) throws IOException, KeeperException { + public HMasterForTest(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionAssignedToMultipleRegionServers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionAssignedToMultipleRegionServers.java index 0d8202b69f26..44af256b92af 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionAssignedToMultipleRegionServers.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionAssignedToMultipleRegionServers.java @@ -41,7 +41,6 @@ import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -110,7 +109,7 @@ public ReportRegionStateTransitionResponse reportRegionStateTransition( public static final class HMasterForTest extends HMaster { - public HMasterForTest(Configuration conf) throws IOException, KeeperException { + public HMasterForTest(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java index 371897bdbcdd..d07dfef1ae32 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java @@ -44,7 +44,6 @@ import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.IdLock; -import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -106,7 +105,7 @@ public ReportRegionStateTransitionResponse reportRegionStateTransition( public static final class HMasterForTest extends HMaster { - public HMasterForTest(Configuration conf) throws IOException, KeeperException { + public HMasterForTest(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java index 6c9e5eb34ba6..1de806fe0ffb 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java @@ -44,7 +44,6 @@ import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -117,7 +116,7 @@ public ReportRegionStateTransitionResponse reportRegionStateTransition( public static final class HMasterForTest extends HMaster { - public HMasterForTest(Configuration conf) throws IOException, KeeperException { + public HMasterForTest(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionRetry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionRetry.java index 6c191c9be8a1..71c4693cfa60 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionRetry.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionRetry.java @@ -40,7 +40,6 @@ import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -84,7 +83,7 @@ public ReportRegionStateTransitionResponse reportRegionStateTransition( public static final class HMasterForTest extends HMaster { - public HMasterForTest(Configuration conf) throws IOException, KeeperException { + public HMasterForTest(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java index cbbdbdc8d157..d676af929392 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java @@ -43,7 +43,6 @@ import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.IdLock; -import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -130,7 +129,7 @@ public List getRegionsOnServer(ServerName serverName) { public static final class HMasterForTest extends HMaster { - public HMasterForTest(Configuration conf) throws IOException, KeeperException { + public HMasterForTest(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestWakeUpUnexpectedProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestWakeUpUnexpectedProcedure.java index 47c70a156997..62e31615def4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestWakeUpUnexpectedProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestWakeUpUnexpectedProcedure.java @@ -44,7 +44,6 @@ import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; -import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -198,7 +197,7 @@ public List createDestinationServersList() { public static final class HMasterForTest extends HMaster { - public HMasterForTest(Configuration conf) throws IOException, KeeperException { + public HMasterForTest(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java index f46bb418ea15..4dff86d6ce1e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java @@ -33,7 +33,6 @@ import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; -import org.apache.zookeeper.KeeperException; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Test; @@ -53,7 +52,7 @@ public class TestRegisterPeerWorkerWhenRestarting extends SyncReplicationTestBas public static final class HMasterForTest extends HMaster { - public HMasterForTest(Configuration conf) throws IOException, KeeperException { + public HMasterForTest(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/protobuf/TestProtobufUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/protobuf/TestProtobufUtil.java index ff29df838b5d..69e656fbe52c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/protobuf/TestProtobufUtil.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/protobuf/TestProtobufUtil.java @@ -19,7 +19,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; - import com.google.protobuf.ByteString; import java.io.IOException; import java.nio.ByteBuffer; @@ -29,13 +28,17 @@ import org.apache.hadoop.hbase.CellComparatorImpl; import org.apache.hadoop.hbase.ExtendedCellBuilderFactory; import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.Append; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Increment; import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.io.TimeRange; +import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.protobuf.generated.CellProtos; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.Column; @@ -51,11 +54,12 @@ import org.junit.ClassRule; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.MetaRegionServer; /** * Class to test ProtobufUtil. */ -@Category({MiscTests.class, SmallTests.class}) +@Category({ MiscTests.class, SmallTests.class}) public class TestProtobufUtil { @ClassRule @@ -348,4 +352,32 @@ public void testToCell() throws Exception { ProtobufUtil.toCell(ExtendedCellBuilderFactory.create(CellBuilderType.SHALLOW_COPY), cell); assertTrue(CellComparatorImpl.COMPARATOR.compare(offheapKV, newOffheapKV) == 0); } + + @Test + public void testMetaRegionState() throws Exception { + ServerName serverName = ServerName.valueOf("localhost", 1234, 5678); + // New region state style. + for (RegionState.State state: RegionState.State.values()) { + RegionState regionState = + new RegionState(RegionInfoBuilder.FIRST_META_REGIONINFO, state, serverName); + MetaRegionServer metars = MetaRegionServer.newBuilder() + .setServer(org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil.toServerName(serverName)) + .setRpcVersion(HConstants.RPC_CURRENT_VERSION) + .setState(state.convert()).build(); + // Serialize + byte[] data = ProtobufUtil.prependPBMagic(metars.toByteArray()); + ProtobufUtil.prependPBMagic(data); + // Deserialize + RegionState regionStateNew = + org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil.parseMetaRegionStateFrom(data, 1); + assertEquals(regionState.getServerName(), regionStateNew.getServerName()); + assertEquals(regionState.getState(), regionStateNew.getState()); + } + // old style. + RegionState rs = + org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil.parseMetaRegionStateFrom( + serverName.getVersionedBytes(), 1); + assertEquals(serverName, rs.getServerName()); + assertEquals(rs.getState(), RegionState.State.OPEN); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java index aaf2d2eb9a04..f61a77ec3937 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java @@ -122,7 +122,7 @@ public void stopCapturing() { * This test HMaster class will always throw ServerNotRunningYetException if checked. */ public static class NeverInitializedMaster extends HMaster { - public NeverInitializedMaster(Configuration conf) throws IOException, KeeperException { + public NeverInitializedMaster(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationProcedureRetry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationProcedureRetry.java index a2ae0b434249..e9fcc6634664 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationProcedureRetry.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationProcedureRetry.java @@ -36,7 +36,6 @@ import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.ReplicationTests; -import org.apache.zookeeper.KeeperException; import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -134,7 +133,7 @@ public static final class MockHMaster extends HMaster { private ReplicationPeerManager manager; - public MockHMaster(Configuration conf) throws IOException, KeeperException { + public MockHMaster(Configuration conf) throws IOException { super(conf); } diff --git a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java index 0cebc762fd73..d28d8f9df9af 100644 --- a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java +++ b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java @@ -34,12 +34,7 @@ import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - -import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; - import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; -import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; -import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.MetaRegionServer; /** @@ -274,42 +269,17 @@ public static RegionState getMetaRegionState(ZKWatcher zkw) throws KeeperExcepti * @throws KeeperException if a ZooKeeper operation fails */ public static RegionState getMetaRegionState(ZKWatcher zkw, int replicaId) - throws KeeperException { - RegionState.State state = RegionState.State.OPEN; - ServerName serverName = null; + throws KeeperException { + RegionState regionState = null; try { byte[] data = ZKUtil.getData(zkw, zkw.getZNodePaths().getZNodeForReplica(replicaId)); - if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) { - try { - int prefixLen = ProtobufUtil.lengthOfPBMagic(); - ZooKeeperProtos.MetaRegionServer rl = - ZooKeeperProtos.MetaRegionServer.parser().parseFrom(data, prefixLen, - data.length - prefixLen); - if (rl.hasState()) { - state = RegionState.State.convert(rl.getState()); - } - HBaseProtos.ServerName sn = rl.getServer(); - serverName = ServerName.valueOf( - sn.getHostName(), sn.getPort(), sn.getStartCode()); - } catch (InvalidProtocolBufferException e) { - throw new DeserializationException("Unable to parse meta region location"); - } - } else { - // old style of meta region location? - serverName = ProtobufUtil.parseServerNameFrom(data); - } + regionState = ProtobufUtil.parseMetaRegionStateFrom(data, replicaId); } catch (DeserializationException e) { throw ZKUtil.convert(e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } - if (serverName == null) { - state = RegionState.State.OFFLINE; - } - return new RegionState( - RegionReplicaUtil.getRegionInfoForReplica( - RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId), - state, serverName); + return regionState; } /** diff --git a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKWatcher.java b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKWatcher.java index e5ef089a87c4..0b9fc93a0e0c 100644 --- a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKWatcher.java +++ b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKWatcher.java @@ -23,10 +23,8 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.CountDownLatch; import java.util.regex.Matcher; import java.util.regex.Pattern; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.AuthUtil; @@ -81,10 +79,6 @@ public class ZKWatcher implements Watcher, Abortable, Closeable { // listeners to be notified private final List listeners = new CopyOnWriteArrayList<>(); - // Used by ZKUtil:waitForZKConnectionIfAuthenticating to wait for SASL - // negotiation to complete - private CountDownLatch saslLatch = new CountDownLatch(1); - private final Configuration conf; /* A pattern that matches a Kerberos name, borrowed from Hadoop's KerberosName */ @@ -383,13 +377,32 @@ public String prefix(final String str) { */ public List getMetaReplicaNodes() throws KeeperException { List childrenOfBaseNode = ZKUtil.listChildrenNoWatch(this, znodePaths.baseZNode); + return filterMetaReplicaNodes(childrenOfBaseNode); + } + + /** + * Same as {@link #getMetaReplicaNodes()} except that this also registers a watcher on base znode + * for subsequent CREATE/DELETE operations on child nodes. + */ + public List getMetaReplicaNodesAndWatchChildren() throws KeeperException { + List childrenOfBaseNode = + ZKUtil.listChildrenAndWatchForNewChildren(this, znodePaths.baseZNode); + return filterMetaReplicaNodes(childrenOfBaseNode); + } + + /** + * @param nodes Input list of znodes + * @return Filtered list of znodes from nodes that belong to meta replica(s). + */ + private List filterMetaReplicaNodes(List nodes) { + if (nodes == null || nodes.isEmpty()) { + return new ArrayList<>(); + } List metaReplicaNodes = new ArrayList<>(2); - if (childrenOfBaseNode != null) { - String pattern = conf.get("zookeeper.znode.metaserver","meta-region-server"); - for (String child : childrenOfBaseNode) { - if (child.startsWith(pattern)) { - metaReplicaNodes.add(child); - } + String pattern = conf.get(ZNodePaths.META_ZNODE_PREFIX_CONF_KEY, ZNodePaths.META_ZNODE_PREFIX); + for (String child : nodes) { + if (child.startsWith(pattern)) { + metaReplicaNodes.add(child); } } return metaReplicaNodes;