diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java index 9177bc057f7c..3e6ec4e13e89 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hbase.ClusterMetrics.Option; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionLocation; @@ -87,7 +88,6 @@ import org.apache.hadoop.hbase.util.RegionSplitter; import org.apache.hadoop.hbase.zookeeper.EmptyWatcher; import org.apache.hadoop.hbase.zookeeper.ZKConfig; -import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.yetus.audience.InterfaceAudience; @@ -119,8 +119,35 @@ * * */ -@InterfaceAudience.Private -public final class Canary implements Tool { +@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) +public class Canary implements Tool, CanaryInterface { + + @Override + public int runRegionCanary(String[] targets) throws Exception { + String configuredReadTableTimeoutsStr = conf.get(HBASE_CANARY_REGION_READ_TABLE_TIMEOUT); + try { + if (configuredReadTableTimeoutsStr != null) { + populateReadTableTimeoutsMap(configuredReadTableTimeoutsStr); + } + } catch (IllegalArgumentException e) { + LOG.error("Constructing read table timeouts map failed ", e); + return USAGE_EXIT_CODE; + } + return runMonitor(targets); + } + + @Override + public int runRegionServerCanary(String[] targets) throws Exception { + regionServerMode = true; + return runMonitor(targets); + } + + @Override + public int runZookeeperCanary() throws Exception { + zookeeperMode = true; + return runMonitor(null); + } + /** * Sink interface used by the canary to output information */ @@ -583,13 +610,8 @@ public Void call() { private static final String CANARY_TABLE_FAMILY_NAME = "Test"; private Configuration conf = null; - private long interval = 0; private Sink sink = null; - private boolean useRegExp; - private long timeout = DEFAULT_TIMEOUT; - private boolean failOnError = true; - /** * True if we are to run in 'regionServer' mode. */ @@ -600,13 +622,6 @@ public Void call() { */ private boolean zookeeperMode = false; - private long permittedFailures = 0; - private boolean regionServerAllRegions = false; - private boolean writeSniffing = false; - private long configuredWriteTableTimeout = DEFAULT_TIMEOUT; - private boolean treatFailureAsError = false; - private TableName writeTableName = DEFAULT_WRITE_TABLE_NAME; - /** * This is a Map of table to timeout. The timeout is for reading all regions in the table; i.e. * we aggregate time to fetch each region and it needs to be less than this value else we @@ -614,6 +629,29 @@ public Void call() { */ private HashMap configuredReadTableTimeouts = new HashMap<>(); + public static final String HBASE_CANARY_REGIONSERVER_ALL_REGIONS + = "hbase.canary.regionserver_all_regions"; + + public static final String HBASE_CANARY_REGION_WRITE_SNIFFING + = "hbase.canary.region.write.sniffing"; + public static final String HBASE_CANARY_REGION_WRITE_TABLE_TIMEOUT + = "hbase.canary.region.write.table.timeout"; + public static final String HBASE_CANARY_REGION_WRITE_TABLE_NAME + = "hbase.canary.region.write.table.name"; + public static final String HBASE_CANARY_REGION_READ_TABLE_TIMEOUT + = "hbase.canary.region.read.table.timeout"; + + public static final String HBASE_CANARY_ZOOKEEPER_PERMITTED_FAILURES + = "hbase.canary.zookeeper.permitted.failures"; + + public static final String HBASE_CANARY_INTERVAL = "hbase.canary.interval"; + public static final String HBASE_CANARY_TREAT_FAILURE_AS_ERROR + = "hbase.canary.treat.failure.as.error"; + public static final String HBASE_CANARY_USE_REGEX = "hbase.canary.use.regex"; + public static final String HBASE_CANARY_TIMEOUT = "hbase.canary.timeout"; + public static final String HBASE_CANARY_FAIL_ON_ERROR = "hbase.canary.fail.on.error"; + + private ExecutorService executor; // threads to retrieve data from regionservers public Canary() { @@ -630,6 +668,11 @@ public Canary(ExecutorService executor) { this.sink = sink; } + Canary(Configuration conf, ExecutorService executor) { + this(executor); + setConf(conf); + } + @Override public Configuration getConf() { return conf; @@ -637,11 +680,18 @@ public Configuration getConf() { @Override public void setConf(Configuration conf) { + if (conf == null) { + conf = HBaseConfiguration.create(); + } this.conf = conf; } private int parseArgs(String[] args) { int index = -1; + long interval = 0, permittedFailures = 0; + boolean regionServerAllRegions = false, writeSniffing = false; + String readTableTimeoutsStr = null; + // Process command line args for (int i = 0; i < args.length; i++) { String cmd = args[i]; @@ -658,7 +708,8 @@ private int parseArgs(String[] args) { printUsageAndExit(); } else if (cmd.equals("-daemon") && interval == 0) { // user asked for daemon mode, set a default interval between checks - interval = DEFAULT_INTERVAL; + conf.setLong(HBASE_CANARY_INTERVAL, DEFAULT_INTERVAL); + } else if (cmd.equals("-interval")) { // user has specified an interval for canary breaths (-interval N) i++; @@ -674,18 +725,21 @@ private int parseArgs(String[] args) { System.err.println("-interval needs a numeric value argument."); printUsageAndExit(); } + conf.setLong(HBASE_CANARY_INTERVAL, interval); } else if (cmd.equals("-zookeeper")) { this.zookeeperMode = true; } else if(cmd.equals("-regionserver")) { this.regionServerMode = true; } else if(cmd.equals("-allRegions")) { - this.regionServerAllRegions = true; + conf.setBoolean(HBASE_CANARY_REGIONSERVER_ALL_REGIONS, true); + regionServerAllRegions = true; } else if(cmd.equals("-writeSniffing")) { - this.writeSniffing = true; + writeSniffing = true; + conf.setBoolean(HBASE_CANARY_REGION_WRITE_SNIFFING, true); } else if(cmd.equals("-treatFailureAsError") || cmd.equals("-failureAsError")) { - this.treatFailureAsError = true; + conf.setBoolean(HBASE_CANARY_TREAT_FAILURE_AS_ERROR, true); } else if (cmd.equals("-e")) { - this.useRegExp = true; + conf.setBoolean(HBASE_CANARY_USE_REGEX, true); } else if (cmd.equals("-t")) { i++; @@ -693,13 +747,14 @@ private int parseArgs(String[] args) { System.err.println("-t takes a numeric milliseconds value argument."); printUsageAndExit(); } - + long timeout = 0; try { - this.timeout = Long.parseLong(args[i]); + timeout = Long.parseLong(args[i]); } catch (NumberFormatException e) { System.err.println("-t takes a numeric milliseconds value argument."); printUsageAndExit(); } + conf.setLong(HBASE_CANARY_TIMEOUT, timeout); } else if(cmd.equals("-writeTableTimeout")) { i++; @@ -707,13 +762,14 @@ private int parseArgs(String[] args) { System.err.println("-writeTableTimeout takes a numeric milliseconds value argument."); printUsageAndExit(); } - + long configuredWriteTableTimeout = 0; try { - this.configuredWriteTableTimeout = Long.parseLong(args[i]); + configuredWriteTableTimeout = Long.parseLong(args[i]); } catch (NumberFormatException e) { System.err.println("-writeTableTimeout takes a numeric milliseconds value argument."); printUsageAndExit(); } + conf.setLong(HBASE_CANARY_REGION_WRITE_TABLE_TIMEOUT, configuredWriteTableTimeout); } else if (cmd.equals("-writeTable")) { i++; @@ -721,7 +777,7 @@ private int parseArgs(String[] args) { System.err.println("-writeTable takes a string tablename value argument."); printUsageAndExit(); } - this.writeTableName = TableName.valueOf(args[i]); + conf.set(HBASE_CANARY_REGION_WRITE_TABLE_NAME, args[i]); } else if (cmd.equals("-f")) { i++; @@ -731,7 +787,7 @@ private int parseArgs(String[] args) { printUsageAndExit(); } - this.failOnError = Boolean.parseBoolean(args[i]); + conf.setBoolean(HBASE_CANARY_FAIL_ON_ERROR, Boolean.parseBoolean(args[i])); } else if (cmd.equals("-readTableTimeouts")) { i++; @@ -740,23 +796,8 @@ private int parseArgs(String[] args) { "millisecond timeouts per table (without spaces)."); printUsageAndExit(); } - String [] tableTimeouts = args[i].split(","); - for (String tT: tableTimeouts) { - String [] nameTimeout = tT.split("="); - if (nameTimeout.length < 2) { - System.err.println("Each -readTableTimeouts argument must be of the form " + - "= (without spaces)."); - printUsageAndExit(); - } - long timeoutVal = 0L; - try { - timeoutVal = Long.parseLong(nameTimeout[1]); - } catch (NumberFormatException e) { - System.err.println("-readTableTimeouts read timeout for each table must be a numeric value argument."); - printUsageAndExit(); - } - this.configuredReadTableTimeouts.put(nameTimeout[0], timeoutVal); - } + readTableTimeoutsStr = args[i]; + conf.set(HBASE_CANARY_REGION_READ_TABLE_TIMEOUT, readTableTimeoutsStr); } else if (cmd.equals("-permittedZookeeperFailures")) { i++; @@ -765,11 +806,12 @@ private int parseArgs(String[] args) { printUsageAndExit(); } try { - this.permittedFailures = Long.parseLong(args[i]); + permittedFailures = Long.parseLong(args[i]); } catch (NumberFormatException e) { System.err.println("-permittedZookeeperFailures needs a numeric value argument."); printUsageAndExit(); } + conf.setLong(HBASE_CANARY_ZOOKEEPER_PERMITTED_FAILURES, permittedFailures); } else { // no options match System.err.println(cmd + " options is invalid."); @@ -780,22 +822,22 @@ private int parseArgs(String[] args) { index = i; } } - if (this.regionServerAllRegions && !this.regionServerMode) { + if (regionServerAllRegions && !this.regionServerMode) { System.err.println("-allRegions can only be specified in regionserver mode."); printUsageAndExit(); } if (this.zookeeperMode) { - if (this.regionServerMode || this.regionServerAllRegions || this.writeSniffing) { + if (this.regionServerMode || regionServerAllRegions || writeSniffing) { System.err.println("-zookeeper is exclusive and cannot be combined with " + "other modes."); printUsageAndExit(); } } - if (this.permittedFailures != 0 && !this.zookeeperMode) { + if (permittedFailures != 0 && !this.zookeeperMode) { System.err.println("-permittedZookeeperFailures requires -zookeeper mode."); printUsageAndExit(); } - if (!this.configuredReadTableTimeouts.isEmpty() && (this.regionServerMode || this.zookeeperMode)) { + if (readTableTimeoutsStr != null && (this.regionServerMode || this.zookeeperMode)) { System.err.println("-readTableTimeouts can only be configured in region mode."); printUsageAndExit(); } @@ -805,6 +847,24 @@ private int parseArgs(String[] args) { @Override public int run(String[] args) throws Exception { int index = parseArgs(args); + String[] monitorTargets = null; + + if (index >= 0) { + int length = args.length - index; + monitorTargets = new String[length]; + System.arraycopy(args, index, monitorTargets, 0, length); + } + + if (zookeeperMode) { + return runZookeeperCanary(); + } else if (regionServerMode) { + return runRegionServerCanary(monitorTargets); + } else { + return runRegionCanary(monitorTargets); + } + } + + private int runMonitor(String[] monitorTargets) throws Exception { ChoreService choreService = null; // Launches chore for refreshing kerberos credentials if security is enabled. @@ -818,15 +878,18 @@ public int run(String[] args) throws Exception { // Start to prepare the stuffs Monitor monitor = null; - Thread monitorThread = null; + Thread monitorThread; long startTime = 0; long currentTimeLength = 0; + boolean failOnError = conf.getBoolean(HBASE_CANARY_FAIL_ON_ERROR, true); + long timeout = conf.getLong(HBASE_CANARY_TIMEOUT, DEFAULT_TIMEOUT); + long interval = conf.getLong(HBASE_CANARY_INTERVAL, 0); // Get a connection to use in below. try (Connection connection = ConnectionFactory.createConnection(this.conf)) { do { // Do monitor !! try { - monitor = this.newMonitor(connection, index, args); + monitor = this.newMonitor(connection, monitorTargets); monitorThread = new Thread(monitor, "CanaryMonitor-" + System.currentTimeMillis()); startTime = System.currentTimeMillis(); monitorThread.start(); @@ -834,7 +897,7 @@ public int run(String[] args) throws Exception { // wait for 1 sec Thread.sleep(1000); // exit if any error occurs - if (this.failOnError && monitor.hasError()) { + if (failOnError && monitor.hasError()) { monitorThread.interrupt(); if (monitor.initialized) { return monitor.errorCode; @@ -843,9 +906,9 @@ public int run(String[] args) throws Exception { } } currentTimeLength = System.currentTimeMillis() - startTime; - if (currentTimeLength > this.timeout) { + if (currentTimeLength > timeout) { LOG.error("The monitor is running too long (" + currentTimeLength - + ") after timeout limit:" + this.timeout + + ") after timeout limit:" + timeout + " will be killed itself !!"); if (monitor.initialized) { return TIMEOUT_ERROR_EXIT_CODE; @@ -855,7 +918,7 @@ public int run(String[] args) throws Exception { } } - if (this.failOnError && monitor.finalCheckForErrors()) { + if (failOnError && monitor.finalCheckForErrors()) { monitorThread.interrupt(); return monitor.errorCode; } @@ -873,6 +936,7 @@ public int run(String[] args) throws Exception { return monitor.errorCode; } + public Map getReadFailures() { return sink.getReadFailures(); } @@ -933,43 +997,65 @@ Sink getSink(Configuration configuration, Class clazz) { /** * A Factory method for {@link Monitor}. * Makes a RegionServerMonitor, or a ZooKeeperMonitor, or a RegionMonitor. - * @param index a start index for monitor target - * @param args args passed from user * @return a Monitor instance */ - public Monitor newMonitor(final Connection connection, int index, String[] args) { - Monitor monitor = null; - String[] monitorTargets = null; - - if (index >= 0) { - int length = args.length - index; - monitorTargets = new String[length]; - System.arraycopy(args, index, monitorTargets, 0, length); - } + private Monitor newMonitor(final Connection connection, String[] monitorTargets) { + Monitor monitor; + boolean useRegExp = conf.getBoolean(HBASE_CANARY_USE_REGEX, false); + boolean regionServerAllRegions + = conf.getBoolean(HBASE_CANARY_REGIONSERVER_ALL_REGIONS, false); + boolean treatFailureAsError + = conf.getBoolean(HBASE_CANARY_TREAT_FAILURE_AS_ERROR, false); + int permittedFailures + = conf.getInt(HBASE_CANARY_ZOOKEEPER_PERMITTED_FAILURES, 0); + boolean writeSniffing + = conf.getBoolean(HBASE_CANARY_REGION_WRITE_SNIFFING, false); + String writeTableName = conf.get(HBASE_CANARY_REGION_WRITE_TABLE_NAME, + DEFAULT_WRITE_TABLE_NAME.getNameAsString()); + long configuredWriteTableTimeout + = conf.getLong(HBASE_CANARY_REGION_WRITE_TABLE_TIMEOUT, DEFAULT_TIMEOUT); if (this.regionServerMode) { monitor = - new RegionServerMonitor(connection, monitorTargets, this.useRegExp, + new RegionServerMonitor(connection, monitorTargets, useRegExp, getSink(connection.getConfiguration(), RegionServerStdOutSink.class), - this.executor, this.regionServerAllRegions, - this.treatFailureAsError, this.permittedFailures); + this.executor, regionServerAllRegions, + treatFailureAsError, permittedFailures); + } else if (this.zookeeperMode) { monitor = - new ZookeeperMonitor(connection, monitorTargets, this.useRegExp, + new ZookeeperMonitor(connection, monitorTargets, useRegExp, getSink(connection.getConfiguration(), ZookeeperStdOutSink.class), - this.executor, this.treatFailureAsError, - this.permittedFailures); + this.executor, treatFailureAsError, permittedFailures); } else { monitor = - new RegionMonitor(connection, monitorTargets, this.useRegExp, + new RegionMonitor(connection, monitorTargets, useRegExp, getSink(connection.getConfiguration(), RegionStdOutSink.class), - this.executor, this.writeSniffing, - this.writeTableName, this.treatFailureAsError, this.configuredReadTableTimeouts, - this.configuredWriteTableTimeout, this.permittedFailures); + this.executor, writeSniffing, + TableName.valueOf(writeTableName), treatFailureAsError, configuredReadTableTimeouts, + configuredWriteTableTimeout, permittedFailures); } return monitor; } + private void populateReadTableTimeoutsMap(String configuredReadTableTimeoutsStr) { + String[] tableTimeouts = configuredReadTableTimeoutsStr.split(","); + for (String tT : tableTimeouts) { + String[] nameTimeout = tT.split("="); + if (nameTimeout.length < 2) { + throw new IllegalArgumentException("Each -readTableTimeouts argument must be of the form " + + "= (without spaces)."); + } + long timeoutVal; + try { + timeoutVal = Long.parseLong(nameTimeout[1]); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("-readTableTimeouts read timeout for each table" + + " must be a numeric value argument."); + } + configuredReadTableTimeouts.put(nameTimeout[0], timeoutVal); + } + } /** * A Monitor super-class can be extended by users */ @@ -1628,13 +1714,10 @@ private Map> doFilterRegionServerByName( public static void main(String[] args) throws Exception { final Configuration conf = HBaseConfiguration.create(); - // Loading the generic options to conf - new GenericOptionsParser(conf, args); - int numThreads = conf.getInt("hbase.canary.threads.num", MAX_THREADS_NUM); LOG.info("Execution thread count={}", numThreads); - int exitCode = 0; + int exitCode; ExecutorService executor = new ScheduledThreadPoolExecutor(numThreads); try { exitCode = ToolRunner.run(conf, new Canary(executor), args); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryInterface.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryInterface.java new file mode 100644 index 000000000000..c96a19a16fd4 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryInterface.java @@ -0,0 +1,57 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.tool; + + +import org.apache.hadoop.conf.Configuration; +import org.apache.yetus.audience.InterfaceAudience; + +import java.util.concurrent.ExecutorService; + +@InterfaceAudience.Public +public interface CanaryInterface { + + static CanaryInterface create(Configuration conf, ExecutorService executor) { + return new Canary(conf, executor); + } + + /** + * Run Canary in Region mode. + * @param targets -- list of monitor tables. + * @return the exit code of the Canary tool. + * @throws Exception + */ + public int runRegionCanary(String[] targets) throws Exception; + + /** + * Runs Canary in Region server mode. + * @param targets -- list of monitor tables. + * @return the exit code of the Canary tool. + * @throws Exception + */ + public int runRegionServerCanary(String[] targets) throws Exception; + + /** + * Runs Canary in Zookeeper mode. + * @return the exit code of the Canary tool. + * @throws Exception + */ + public int runZookeeperCanary() throws Exception; +}