diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java index c0e2994c3c5e..af014093ae7c 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java @@ -2265,6 +2265,20 @@ default boolean replicationPeerModificationSwitch(boolean on) throws IOException */ void decommissionRegionServers(List servers, boolean offload) throws IOException; + /** + * Mark region server(s) as decommissioned to prevent additional regions from getting assigned to + * them. Optionally unload the regions on the servers. If there are multiple servers to be + * decommissioned, decommissioning them at the same time can prevent wasteful region movements. + * Region unloading is asynchronous. + * @param servers The list of servers to decommission. + * @param offload True to offload the regions from the decommissioned servers + * @param matchHostNameOnly True to prevent the hostname from ever joining again, regardless of + * startTime + * @throws IOException if a remote or network exception occurs + */ + void decommissionRegionServers(List servers, boolean offload, + boolean matchHostNameOnly) throws IOException; + /** * List region servers marked as decommissioned, which can not be assigned regions. * @return List of decommissioned region servers. diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java index c13dfc33e3d2..8e14d7a2b272 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java @@ -929,6 +929,12 @@ public void decommissionRegionServers(List servers, boolean offload) get(admin.decommissionRegionServers(servers, offload)); } + @Override + public void decommissionRegionServers(List servers, boolean offload, + boolean matchHostNameOnly) throws IOException { + get(admin.decommissionRegionServers(servers, offload, matchHostNameOnly)); + } + @Override public List listDecommissionedRegionServers() throws IOException { return get(admin.listDecommissionedRegionServers()); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java index bdb0228d9687..242d2d7a6950 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java @@ -1146,6 +1146,9 @@ CompletableFuture isProcedureFinished(String signature, String instance */ CompletableFuture decommissionRegionServers(List servers, boolean offload); + CompletableFuture decommissionRegionServers(List servers, boolean offload, + boolean matchHostNameOnly); + /** * List region servers marked as decommissioned, which can not be assigned regions. * @return List of decommissioned region servers wrapped by {@link CompletableFuture} diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java index 69f353600036..73825132d04a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java @@ -635,6 +635,12 @@ public CompletableFuture decommissionRegionServers(List server return wrap(rawAdmin.decommissionRegionServers(servers, offload)); } + @Override + public CompletableFuture decommissionRegionServers(List servers, + boolean offload, boolean matchHostNameOnly) { + return wrap(rawAdmin.decommissionRegionServers(servers, offload, matchHostNameOnly)); + } + @Override public CompletableFuture> listDecommissionedRegionServers() { return wrap(rawAdmin.listDecommissionedRegionServers()); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java index 103a64e520a1..8de848e3ad39 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java @@ -2509,10 +2509,21 @@ public CompletableFuture getLocks() { @Override public CompletableFuture decommissionRegionServers(List servers, boolean offload) { + // By default, when we decommission a RegionServer we don't mark the hostname as permanently + // decommissioned and instead mark the server location (host + port + startCode) as such + boolean matchHostNameOnly = false; + + return this.decommissionRegionServers(servers, offload, matchHostNameOnly); + } + + @Override + public CompletableFuture decommissionRegionServers(List servers, + boolean offload, boolean matchHostNameOnly) { return this. newMasterCaller() .action((controller, stub) -> this. call(controller, stub, - RequestConverter.buildDecommissionRegionServersRequest(servers, offload), + RequestConverter.buildDecommissionRegionServersRequest(servers, offload, + matchHostNameOnly), (s, c, req, done) -> s.decommissionRegionServers(c, req, done), resp -> null)) .call(); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java index ce12aaea0d24..6151bb815120 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java @@ -1558,8 +1558,14 @@ public static GetQuotaStatesRequest buildGetQuotaStatesRequest() { public static DecommissionRegionServersRequest buildDecommissionRegionServersRequest(List servers, boolean offload) { + return RequestConverter.buildDecommissionRegionServersRequest(servers, offload, false); + } + + public static DecommissionRegionServersRequest buildDecommissionRegionServersRequest( + List servers, boolean offload, boolean matchHostNameOnly) { return DecommissionRegionServersRequest.newBuilder() - .addAllServerName(toProtoServerNames(servers)).setOffload(offload).build(); + .addAllServerName(toProtoServerNames(servers)).setOffload(offload) + .setMatchHostNameOnly(matchHostNameOnly).build(); } public static RecommissionRegionServerRequest diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto index a8adaa27453f..bd49b2bba5e2 100644 --- a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto @@ -696,6 +696,7 @@ message ListDecommissionedRegionServersResponse { message DecommissionRegionServersRequest { repeated ServerName server_name = 1; required bool offload = 2; + optional bool match_host_name_only = 3 [default = false]; } message DecommissionRegionServersResponse { diff --git a/hbase-protocol-shaded/src/main/protobuf/server/zookeeper/ZooKeeper.proto b/hbase-protocol-shaded/src/main/protobuf/server/zookeeper/ZooKeeper.proto index 17fa31ffbe69..5d166a1d4846 100644 --- a/hbase-protocol-shaded/src/main/protobuf/server/zookeeper/ZooKeeper.proto +++ b/hbase-protocol-shaded/src/main/protobuf/server/zookeeper/ZooKeeper.proto @@ -107,3 +107,7 @@ message DeprecatedTableState { message SwitchState { optional bool enabled = 1; } + +message DrainedZNodeServerData { + optional bool match_host_name_only = 1 [default = false]; +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DrainingServerTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DrainingServerTracker.java index 41f5709e911a..6d2be3e220cb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DrainingServerTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DrainingServerTracker.java @@ -26,11 +26,16 @@ import org.apache.hadoop.hbase.zookeeper.ZKListener; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; +import org.apache.hadoop.hbase.zookeeper.ZNodePaths; import org.apache.yetus.audience.InterfaceAudience; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.DrainedZNodeServerData; + /** * Tracks the list of draining region servers via ZK. *

@@ -77,36 +82,66 @@ public void serverAdded(ServerName sn) { } } }); + List servers = ZKUtil.listChildrenAndWatchThem(watcher, watcher.getZNodePaths().drainingZNode); - add(servers); + + if (servers != null) { + add(servers); + } } private void add(final List servers) throws IOException { synchronized (this.drainingServers) { - this.drainingServers.clear(); + // Clear all servers from the draining list that shouldn't stay drained. Information about + // whether a server should stay drained or not is added to the ZNode by the HMaster + this.drainingServers.removeIf(sn -> !shouldServerStayDrained(sn)); + for (String n : servers) { final ServerName sn = ServerName.valueOf(ZKUtil.getNodeName(n)); this.drainingServers.add(sn); this.serverManager.addServerToDrainList(sn); - LOG.info("Draining RS node created, adding to list [" + sn + "]"); - + LOG.info("Draining RS node created, adding to list [{}]", sn); } } } private void remove(final ServerName sn) { synchronized (this.drainingServers) { + if (shouldServerStayDrained(sn)) { + LOG.info( + "Refusing to remove drained RS {} from the list, it's marked as permanently drained", sn); + return; + } this.drainingServers.remove(sn); this.serverManager.removeServerFromDrainList(sn); + LOG.info("Successfully removed drained RS {} from the list", sn); } } + private boolean shouldServerStayDrained(final ServerName sn) { + boolean shouldBePermanentlyDecommissioned = false; + String parentZnode = this.watcher.getZNodePaths().drainingZNode; + String node = ZNodePaths.joinZNode(parentZnode, sn.getServerName()); + + try { + byte[] rawData = ZKUtil.getData(this.watcher, node); + // Check if the data is present for backwards compatibility, some nodes may not have it + if (rawData != null && rawData.length > 0) { + DrainedZNodeServerData znodeData = DrainedZNodeServerData.parseFrom(rawData); + shouldBePermanentlyDecommissioned = znodeData.getMatchHostNameOnly(); + } + } catch (InterruptedException | KeeperException | InvalidProtocolBufferException e) { + // pass + } + return shouldBePermanentlyDecommissioned; + } + @Override public void nodeDeleted(final String path) { if (path.startsWith(watcher.getZNodePaths().drainingZNode)) { final ServerName sn = ServerName.valueOf(ZKUtil.getNodeName(path)); - LOG.info("Draining RS node deleted, removing from list [" + sn + "]"); + LOG.info("Draining RS node deleted, removing from list [{}]", sn); remove(sn); } } @@ -117,10 +152,11 @@ public void nodeChildrenChanged(final String path) { try { final List newNodes = ZKUtil.listChildrenAndWatchThem(watcher, watcher.getZNodePaths().drainingZNode); - add(newNodes); - } catch (KeeperException e) { - abortable.abort("Unexpected zk exception getting RS nodes", e); - } catch (IOException e) { + + if (newNodes != null) { + add(newNodes); + } + } catch (KeeperException | IOException e) { abortable.abort("Unexpected zk exception getting RS nodes", e); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 88b82f01069e..eef4ad71a3cc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -282,6 +282,7 @@ import org.apache.hbase.thirdparty.com.google.common.io.Closeables; import org.apache.hbase.thirdparty.com.google.gson.JsonParseException; import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors; +import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; import org.apache.hbase.thirdparty.com.google.protobuf.Service; import org.apache.hbase.thirdparty.org.eclipse.jetty.server.Server; import org.apache.hbase.thirdparty.org.eclipse.jetty.server.ServerConnector; @@ -293,6 +294,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.DrainedZNodeServerData; /** * HMaster is the "master server" for HBase. An HBase cluster has one active master. If many masters @@ -4017,13 +4019,27 @@ public boolean isReplicationPeerModificationEnabled() { */ public void decommissionRegionServers(final List servers, final boolean offload) throws IOException { + this.decommissionRegionServers(servers, offload, false); + } + + public void decommissionRegionServers(final List servers, final boolean offload, + final boolean matchHostNameOnly) throws IOException { List serversAdded = new ArrayList<>(servers.size()); // Place the decommission marker first. String parentZnode = getZooKeeper().getZNodePaths().drainingZNode; for (ServerName server : servers) { try { String node = ZNodePaths.joinZNode(parentZnode, server.getServerName()); - ZKUtil.createAndFailSilent(getZooKeeper(), node); + // Encode whether the host should be decommissioned permanently regardless of its + // port + startCode combination or not in the znode's data + if (matchHostNameOnly) { + LOG.info("Marking the host of '{}' as permanently decommissioned in ZooKeeper", + server.getServerName()); + } + byte[] data = DrainedZNodeServerData.newBuilder().setMatchHostNameOnly(matchHostNameOnly) + .build().toByteArray(); + // Create a node with binary data + ZKUtil.createAndFailSilent(getZooKeeper(), node, data); } catch (KeeperException ke) { throw new HBaseIOException( this.zooKeeper.prefix("Unable to decommission '" + server.getServerName() + "'."), ke); @@ -4068,8 +4084,31 @@ public void recommissionRegionServer(final ServerName server, // Remove the server from decommissioned (draining) server list. String parentZnode = getZooKeeper().getZNodePaths().drainingZNode; String node = ZNodePaths.joinZNode(parentZnode, server.getServerName()); + boolean shouldBePermanentlyDecommissioned = false; + + // Get the binary data in the node and check whether the hostname is marked as + // permanently decommissioned or not + try { + byte[] rawData = ZKUtil.getData(getZooKeeper(), node); + + // Check if the data is present for backwards compatibility, some nodes may not have it + if (rawData != null && rawData.length > 0) { + DrainedZNodeServerData znodeData = DrainedZNodeServerData.parseFrom(rawData); + shouldBePermanentlyDecommissioned = znodeData.getMatchHostNameOnly(); + } + } catch (InterruptedException | KeeperException | InvalidProtocolBufferException e) { + throw new HBaseIOException(this.zooKeeper.prefix("Unable to recommission " + + server.getServerName() + ", was unable to read the node's data"), e); + } + try { - ZKUtil.deleteNodeFailSilent(getZooKeeper(), node); + if (shouldBePermanentlyDecommissioned) { + LOG.info("Skipping recommissioning of server {} because it was marked as permanently" + + " decommissioned in ZooKeeper", server.getServerName()); + return; + } else { + ZKUtil.deleteNodeFailSilent(getZooKeeper(), node); + } } catch (KeeperException ke) { throw new HBaseIOException( this.zooKeeper.prefix("Unable to recommission '" + server.getServerName() + "'."), ke); @@ -4092,8 +4131,9 @@ public void recommissionRegionServer(final ServerName server, } RegionInfo hri = regionState.getRegion(); if (server.equals(regionState.getServerName())) { - LOG.info("Skipping move of region " + hri.getRegionNameAsString() - + " because region already assigned to the same server " + server + "."); + LOG.info( + "Skipping move of region {} because region already assigned to the same server {}.", + hri.getRegionNameAsString(), server); continue; } RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), server); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 1da8e03d179e..1116b3262220 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -2279,10 +2279,11 @@ public DecommissionRegionServersResponse decommissionRegionServers(RpcController List servers = request.getServerNameList().stream() .map(pbServer -> ProtobufUtil.toServerName(pbServer)).collect(Collectors.toList()); boolean offload = request.getOffload(); + boolean matchHostNameOnly = request.getMatchHostNameOnly(); if (server.cpHost != null) { server.cpHost.preDecommissionRegionServers(servers, offload); } - server.decommissionRegionServers(servers, offload); + server.decommissionRegionServers(servers, offload, matchHostNameOnly); if (server.cpHost != null) { server.cpHost.postDecommissionRegionServers(servers, offload); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 2afd48c58df5..e0c66130293b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -94,10 +94,10 @@ *

* If a sever is known not to be running any more, it is called dead. The dead server needs to be * handled by a ServerShutdownHandler. If the handler is not enabled yet, the server can't be - * handled right away so it is queued up. After the handler is enabled, the server will be submitted - * to a handler to handle. However, the handler may be just partially enabled. If so, the server - * cannot be fully processed, and be queued up for further processing. A server is fully processed - * only after the handler is fully enabled and has completed the handling. + * handled right away, so it is queued up. After the handler is enabled, the server will be + * submitted to a handler to handle. However, the handler may be just partially enabled. If so, the + * server cannot be fully processed, and be queued up for further processing. A server is fully + * processed only after the handler is fully enabled and has completed the handling. */ @InterfaceAudience.Private public class ServerManager { @@ -649,10 +649,9 @@ public synchronized void moveFromOnlineToDeadServers(final ServerName sn) { public synchronized boolean removeServerFromDrainList(final ServerName sn) { // Warn if the server (sn) is not online. ServerName is of the form: // , , - if (!this.isServerOnline(sn)) { - LOG.warn("Server " + sn + " is not currently online. " - + "Removing from draining list anyway, as requested."); + LOG.warn( + "Server {} is not currently online. Removing from draining list anyway, as requested.", sn); } // Remove the server from the draining servers lists. return this.drainingServers.remove(sn); @@ -667,18 +666,18 @@ public synchronized boolean addServerToDrainList(final ServerName sn) { // , , if (!this.isServerOnline(sn)) { - LOG.warn("Server " + sn + " is not currently online. " - + "Ignoring request to add it to draining list."); + LOG.warn("Server {} is not currently online. Ignoring request to add it to draining list.", + sn); return false; } // Add the server to the draining servers lists, if it's not already in // it. if (this.drainingServers.contains(sn)) { - LOG.warn("Server " + sn + " is already in the draining server list." - + "Ignoring request to add it again."); + LOG.warn("Server {} is already in the draining server list.Ignoring request to add it again.", + sn); return true; } - LOG.info("Server " + sn + " added to draining server list."); + LOG.info("Server {} added to draining server list.", sn); return this.drainingServers.add(sn); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin4.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin4.java index e52d8ee92c3c..b6ce94d72b9d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin4.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin4.java @@ -26,20 +26,26 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import java.util.*; +import java.util.stream.Collectors; +import org.apache.hadoop.hbase.ClusterMetrics; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.replication.ReplicationPeerConfig; import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; import org.apache.hadoop.hbase.zookeeper.ZNodePaths; +import org.apache.zookeeper.KeeperException; import org.junit.ClassRule; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.DrainedZNodeServerData; + @Category({ MediumTests.class, ClientTests.class }) public class TestAdmin4 extends TestAdminBase { @ClassRule @@ -53,7 +59,7 @@ public void testDecommissionAndStopRegionServers() throws Exception { ArrayList clusterRegionServers = new ArrayList<>(ADMIN.getRegionServers(true)); - List serversToDecommission = new ArrayList(); + List serversToDecommission = new ArrayList<>(); serversToDecommission.add(clusterRegionServers.get(0)); // Decommission @@ -70,6 +76,145 @@ public void testDecommissionAndStopRegionServers() throws Exception { ZNodePaths.joinZNode(zkw.getZNodePaths().drainingZNode, serverName.getServerName()))); } + /** + * TestCase for HBASE-28342 + */ + @Test + public void testAsyncDecommissionRegionServersByHostNamesPermanently() throws Exception { + List decommissionedRegionServers = ADMIN.listDecommissionedRegionServers(); + assertTrue(decommissionedRegionServers.isEmpty()); + + final TableName tableName = TableName.valueOf(name.getMethodName()); + TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 6); + + ArrayList clusterRegionServers = + new ArrayList<>(ADMIN.getClusterMetrics(EnumSet.of(ClusterMetrics.Option.LIVE_SERVERS)) + .getLiveServerMetrics().keySet()); + + HashMap> serversToDecommission = new HashMap<>(); + // Get a server that has meta online. We will decommission two of the servers, + // leaving one online. + int i; + for (i = 0; i < clusterRegionServers.size(); i++) { + List regionsOnServer = ADMIN.getRegions(clusterRegionServers.get(i)); + if ( + ADMIN.getRegions(clusterRegionServers.get(i)).stream().anyMatch(RegionInfo::isMetaRegion) + ) { + serversToDecommission.put(clusterRegionServers.get(i), regionsOnServer); + break; + } + } + + clusterRegionServers.remove(i); + // Get another server to decommission. + serversToDecommission.put(clusterRegionServers.get(0), + ADMIN.getRegions(clusterRegionServers.get(0))); + + ServerName remainingServer = clusterRegionServers.get(1); + + // Decommission the servers with `matchHostNameOnly` set to `true` so that the hostnames are + // always maintained as decommissioned/drained + boolean matchHostNameOnly = true; + ADMIN.decommissionRegionServers(new ArrayList<>(serversToDecommission.keySet()), true, + matchHostNameOnly); + assertEquals(2, ADMIN.listDecommissionedRegionServers().size()); + + // Verify the regions have been off the decommissioned servers, all on the one + // remaining server. + for (ServerName server : serversToDecommission.keySet()) { + for (RegionInfo region : serversToDecommission.get(server)) { + TEST_UTIL.assertRegionOnServer(region, remainingServer, 10000); + } + } + + // Try to recommission the servers and assert that they remain decommissioned + // No regions should be loaded on them + recommissionRegionServers(serversToDecommission); + // Assert that the number of decommissioned servers is still 2! + assertEquals(2, ADMIN.listDecommissionedRegionServers().size()); + + // Verify that all regions are still on the remainingServer and not on the decommissioned + // servers + for (ServerName server : serversToDecommission.keySet()) { + for (RegionInfo region : serversToDecommission.get(server)) { + TEST_UTIL.assertRegionOnServer(region, remainingServer, 10000); + } + } + + // Cleanup ZooKeeper's state and recommission all servers for the rest of tests + markZnodeAsRecommissionable(serversToDecommission.keySet()); + recommissionRegionServers(serversToDecommission); + } + + /** + * TestCase for HBASE-28342 + */ + @Test + public void testAsyncDecommissionRegionServersSetsHostNameMatchDataFalseInZooKeeperAsExpected() + throws Exception { + assertTrue(ADMIN.listDecommissionedRegionServers().isEmpty()); + + ArrayList clusterRegionServers = + new ArrayList<>(ADMIN.getClusterMetrics(EnumSet.of(ClusterMetrics.Option.LIVE_SERVERS)) + .getLiveServerMetrics().keySet()); + + ServerName decommissionedRegionServer = clusterRegionServers.get(0); + clusterRegionServers.remove(0); + + // Decommission the servers with `matchHostNameOnly` set to `false` so that the hostnames are + // not always considered as decommissioned/drained + boolean expectedMatchHostNameOnly = false; + ADMIN.decommissionRegionServers(Collections.singletonList(decommissionedRegionServer), true, + expectedMatchHostNameOnly); + assertEquals(1, ADMIN.listDecommissionedRegionServers().size()); + + // Read the node's data in ZooKeeper and assert that it was set as expected + ZKWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); + String znodePath = ZNodePaths.joinZNode(zkw.getZNodePaths().drainingZNode, + decommissionedRegionServer.getServerName()); + DrainedZNodeServerData data = DrainedZNodeServerData.parseFrom(ZKUtil.getData(zkw, znodePath)); + assertEquals(expectedMatchHostNameOnly, data.getMatchHostNameOnly()); + + // Recommission the server + ADMIN.recommissionRegionServer(decommissionedRegionServer, new ArrayList<>()); + assertEquals(0, ADMIN.listDecommissionedRegionServers().size()); + } + + /** + * TestCase for HBASE-28342 + */ + @Test + public void testAsyncDecommissionRegionServersSetsHostNameMatchToTrueInZooKeeperAsExpected() + throws Exception { + assertTrue(ADMIN.listDecommissionedRegionServers().isEmpty()); + + ArrayList clusterRegionServers = + new ArrayList<>(ADMIN.getClusterMetrics(EnumSet.of(ClusterMetrics.Option.LIVE_SERVERS)) + .getLiveServerMetrics().keySet()); + + ServerName decommissionedRegionServer = clusterRegionServers.get(0); + clusterRegionServers.remove(0); + + // Decommission the servers with `matchHostNameOnly` set to `true` so that the hostnames are + // always considered as decommissioned/drained + boolean expectedMatchHostNameOnly = true; + ADMIN.decommissionRegionServers(Collections.singletonList(decommissionedRegionServer), true, + expectedMatchHostNameOnly); + assertEquals(1, ADMIN.listDecommissionedRegionServers().size()); + + // Read the node's data in ZooKeeper and assert that it was set as expected + ZKWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); + String znodePath = ZNodePaths.joinZNode(zkw.getZNodePaths().drainingZNode, + decommissionedRegionServer.getServerName()); + DrainedZNodeServerData data = DrainedZNodeServerData.parseFrom(ZKUtil.getData(zkw, znodePath)); + assertEquals(expectedMatchHostNameOnly, data.getMatchHostNameOnly()); + + // Reset the node's data in ZooKeeper in order to be able to recommission the server + markZnodeAsRecommissionable(Collections.singleton(decommissionedRegionServer)); + ADMIN.recommissionRegionServer(decommissionedRegionServer, new ArrayList<>()); + assertEquals(0, ADMIN.listDecommissionedRegionServers().size()); + } + @Test public void testReplicationPeerModificationSwitch() throws Exception { assertTrue(ADMIN.isReplicationPeerModificationEnabled()); @@ -89,4 +234,29 @@ public void testReplicationPeerModificationSwitch() throws Exception { ADMIN.replicationPeerModificationSwitch(true); } } + + private void recommissionRegionServers( + HashMap> decommissionedServers) throws IOException { + for (ServerName server : decommissionedServers.keySet()) { + List encodedRegionNames = decommissionedServers.get(server).stream() + .map(RegionInfo::getEncodedNameAsBytes).collect(Collectors.toList()); + ADMIN.recommissionRegionServer(server, encodedRegionNames); + } + } + + private void markZnodeAsRecommissionable(Set decommissionedServers) + throws IOException { + ZKWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); + for (ServerName serverName : decommissionedServers) { + String znodePath = + ZNodePaths.joinZNode(zkw.getZNodePaths().drainingZNode, serverName.getServerName()); + byte[] newData = + DrainedZNodeServerData.newBuilder().setMatchHostNameOnly(false).build().toByteArray(); + try { + ZKUtil.setData(zkw, znodePath, newData); + } catch (KeeperException e) { + throw new RuntimeException(e); + } + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDecommissionAdminApi.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDecommissionAdminApi.java index 659aa0d05c68..2834df3f383f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDecommissionAdminApi.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncDecommissionAdminApi.java @@ -20,22 +20,31 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.io.IOException; import java.util.ArrayList; import java.util.EnumSet; import java.util.HashMap; import java.util.List; +import java.util.Set; +import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; import org.apache.hadoop.hbase.ClusterMetrics.Option; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZKWatcher; +import org.apache.hadoop.hbase.zookeeper.ZNodePaths; +import org.apache.zookeeper.KeeperException; import org.junit.ClassRule; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos; + @RunWith(Parameterized.class) @Category({ ClientTests.class, MediumTests.class }) public class TestAsyncDecommissionAdminApi extends TestAsyncAdminBase { @@ -58,14 +67,14 @@ public void testAsyncDecommissionRegionServers() throws Exception { assertEquals(TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size(), clusterRegionServers.size()); - HashMap> serversToDecommssion = new HashMap<>(); + HashMap> serversToDecommission = new HashMap<>(); // Get a server that has regions. We will decommission one of the servers, // leaving one online. int i; for (i = 0; i < clusterRegionServers.size(); i++) { List regionsOnServer = admin.getRegions(clusterRegionServers.get(i)).get(); - if (regionsOnServer.size() > 0) { - serversToDecommssion.put(clusterRegionServers.get(i), regionsOnServer); + if (!regionsOnServer.isEmpty()) { + serversToDecommission.put(clusterRegionServers.get(i), regionsOnServer); break; } } @@ -74,13 +83,13 @@ public void testAsyncDecommissionRegionServers() throws Exception { ServerName remainingServer = clusterRegionServers.get(0); // Decommission - admin.decommissionRegionServers(new ArrayList(serversToDecommssion.keySet()), true) + admin.decommissionRegionServers(new ArrayList(serversToDecommission.keySet()), true) .get(); assertEquals(1, admin.listDecommissionedRegionServers().get().size()); // Verify the regions have been off the decommissioned servers, all on the remaining server. - for (ServerName server : serversToDecommssion.keySet()) { - for (RegionInfo region : serversToDecommssion.get(server)) { + for (ServerName server : serversToDecommission.keySet()) { + for (RegionInfo region : serversToDecommission.get(server)) { TEST_UTIL.assertRegionOnServer(region, remainingServer, 10000); } } @@ -91,17 +100,105 @@ public void testAsyncDecommissionRegionServers() throws Exception { TEST_UTIL.waitUntilNoRegionsInTransition(10000); // Recommission and load regions - for (ServerName server : serversToDecommssion.keySet()) { - List encodedRegionNames = serversToDecommssion.get(server).stream() - .map(region -> region.getEncodedNameAsBytes()).collect(Collectors.toList()); + for (ServerName server : serversToDecommission.keySet()) { + List encodedRegionNames = serversToDecommission.get(server).stream() + .map(RegionInfo::getEncodedNameAsBytes).collect(Collectors.toList()); admin.recommissionRegionServer(server, encodedRegionNames).get(); } assertTrue(admin.listDecommissionedRegionServers().get().isEmpty()); // Verify the regions have been moved to the recommissioned servers - for (ServerName server : serversToDecommssion.keySet()) { - for (RegionInfo region : serversToDecommssion.get(server)) { + for (ServerName server : serversToDecommission.keySet()) { + for (RegionInfo region : serversToDecommission.get(server)) { TEST_UTIL.assertRegionOnServer(region, server, 10000); } } } + + @Test + public void testAsyncDecommissionRegionServersByHostNamesPermanently() throws Exception { + admin.balancerSwitch(false, true); + List decommissionedRegionServers = admin.listDecommissionedRegionServers().get(); + assertTrue(decommissionedRegionServers.isEmpty()); + + TEST_UTIL.createMultiRegionTable(tableName, FAMILY, 4); + + ArrayList clusterRegionServers = new ArrayList<>(admin + .getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).get().getLiveServerMetrics().keySet()); + + assertEquals(TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size(), + clusterRegionServers.size()); + + HashMap> serversToDecommission = new HashMap<>(); + // Get a server that has regions. We will decommission one of the servers, + // leaving one online. + int i; + for (i = 0; i < clusterRegionServers.size(); i++) { + List regionsOnServer = admin.getRegions(clusterRegionServers.get(i)).get(); + if (!regionsOnServer.isEmpty()) { + serversToDecommission.put(clusterRegionServers.get(i), regionsOnServer); + break; + } + } + + clusterRegionServers.remove(i); + ServerName remainingServer = clusterRegionServers.get(0); + + // Decommission the server permanently, setting the `matchHostNameOnly` argument to `true` + boolean matchHostNameOnly = true; + admin.decommissionRegionServers(new ArrayList<>(serversToDecommission.keySet()), true, + matchHostNameOnly).get(); + assertEquals(1, admin.listDecommissionedRegionServers().get().size()); + + // Verify the regions have been off the decommissioned servers, all on the remaining server. + for (ServerName server : serversToDecommission.keySet()) { + for (RegionInfo region : serversToDecommission.get(server)) { + TEST_UTIL.assertRegionOnServer(region, remainingServer, 10000); + } + } + + // Maybe the TRSP is still not finished at master side, since the reportRegionTransition just + // updates the procedure store, and we still need to wake up the procedure and execute it in the + // procedure executor, which is asynchronous + TEST_UTIL.waitUntilNoRegionsInTransition(10000); + + // Try to recommission the server and assert that the server is still decommissioned + recommissionRegionServers(serversToDecommission); + assertEquals(1, admin.listDecommissionedRegionServers().get().size()); + + // Verify that the regions still belong to the remainingServer and not the decommissioned ones + for (ServerName server : serversToDecommission.keySet()) { + for (RegionInfo region : serversToDecommission.get(server)) { + TEST_UTIL.assertRegionOnServer(region, remainingServer, 10000); + } + } + + // Clean-up ZooKeeper's state and recommission all servers for the next parameterized test run + removeServersBinaryData(serversToDecommission.keySet()); + recommissionRegionServers(serversToDecommission); + } + + private void + recommissionRegionServers(HashMap> decommissionedServers) + throws ExecutionException, InterruptedException { + for (ServerName server : decommissionedServers.keySet()) { + List encodedRegionNames = decommissionedServers.get(server).stream() + .map(RegionInfo::getEncodedNameAsBytes).collect(Collectors.toList()); + admin.recommissionRegionServer(server, encodedRegionNames).get(); + } + } + + private void removeServersBinaryData(Set decommissionedServers) throws IOException { + ZKWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); + for (ServerName serverName : decommissionedServers) { + String znodePath = + ZNodePaths.joinZNode(zkw.getZNodePaths().drainingZNode, serverName.getServerName()); + byte[] newData = ZooKeeperProtos.DrainedZNodeServerData.newBuilder() + .setMatchHostNameOnly(false).build().toByteArray(); + try { + ZKUtil.setData(zkw, znodePath, newData); + } catch (KeeperException e) { + throw new RuntimeException(e); + } + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java index 35c868413e19..671331b5f5c7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java @@ -715,6 +715,11 @@ public void decommissionRegionServers(List servers, boolean offload) admin.decommissionRegionServers(servers, offload); } + public void decommissionRegionServers(List servers, boolean offload, + boolean matchHostNameOnly) throws IOException { + admin.decommissionRegionServers(servers, offload, matchHostNameOnly); + } + public List listDecommissionedRegionServers() throws IOException { return admin.listDecommissionedRegionServers(); } diff --git a/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java b/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java index 0eff84bba7c8..c7052f532f3b 100644 --- a/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java +++ b/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java @@ -1069,7 +1069,12 @@ public boolean isReplicationPeerEnabled(String peerId) throws IOException { @Override public void decommissionRegionServers(List servers, boolean offload) { throw new NotImplementedException("decommissionRegionServers not supported in ThriftAdmin"); + } + @Override + public void decommissionRegionServers(List servers, boolean offload, + boolean matchHostNameOnly) { + throw new NotImplementedException("decommissionRegionServers not supported in ThriftAdmin"); } @Override diff --git a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java index be8f25cc39b0..cdd046a2e3f1 100644 --- a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java +++ b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java @@ -874,6 +874,7 @@ private static void deleteNodeFailSilent(ZKWatcher zkw, DeleteNodeFailSilent dnf try { zkw.getRecoverableZooKeeper().delete(delete.getPath(), delete.getVersion()); } catch (KeeperException.NoNodeException nne) { + // no-op } catch (InterruptedException ie) { zkw.interruptedException(ie); }