apache · ndimiduk · May 14, 2020 · Apr 27, 2020 · Apr 27, 2020 · Apr 27, 2020
diff --git a/.gitignore b/.gitignore
@@ -23,3 +23,4 @@ linklint/
 .java-version
 *.log
 **/*.log
+tmp
diff --git a/conf/hbase-site.xml b/conf/hbase-site.xml
@@ -1,8 +1,7 @@
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 <!--
-/**
- *
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -21,4 +20,35 @@
  */
 -->
 <configuration>
+  <!--
+    The following properties are set for running HBase as a single process on a
+    developer workstation. With this configuration, HBase is running in
+    "stand-alone" mode and without a distributed file system. In this mode, and
+    without further configuration, HBase and ZooKeeper data are stored on the
+    local filesystem, in a path under the value configured for `hbase.tmp.dir`.
+    This value is overridden from its default value of `/tmp` because many
+    systems clean `/tmp` on a regular basis. Instead, it points to a path within
+    this HBase installation directory.
+
+    Running against the `LocalFileSystem`, as opposed to a distributed
+    filesystem, runs the risk of data integrity issues and data loss. Normally
+    HBase will refuse to run in such an environment. Setting
+    `hbase.unsafe.stream.capability.enforce` to `false` overrides this behavior,
+    permitting operation. This configuration is for the developer workstation
+    only and __should not be used in production!__
+
+    See also https://hbase.apache.org/book.html#standalone_dist
+  -->
+  <property>
+    <name>hbase.cluster.distributed</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hbase.tmp.dir</name>
+    <value>${env.HBASE_HOME:-.}/tmp</value>
+  </property>
+  <property>
+    <name>hbase.unsafe.stream.capability.enforce</name>
+    <value>false</value>
+  </property>
 </configuration>
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java
@@ -28,19 +28,20 @@
 import java.util.Locale;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
+import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FSDataOutputStreamBuilder;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -353,14 +354,6 @@ public static FileSystem getWALFileSystem(final Configuration c) throws IOExcept
     if (enforceStreamCapability != null) {
       fs.getConf().set(UNSAFE_STREAM_CAPABILITY_ENFORCE, enforceStreamCapability);
     }
-    if (fs instanceof LocalFileSystem) {
-      // running on LocalFileSystem, which does not support the required capabilities `HSYNC`
-      // and `HFLUSH`. disable enforcement.
-      final boolean value = false;
-      LOG.warn("Cannot enforce durability guarantees while running on {}. Setting {}={} for"
-        + " this FileSystem.", fs.getUri(), UNSAFE_STREAM_CAPABILITY_ENFORCE, value);
-      fs.getConf().setBoolean(UNSAFE_STREAM_CAPABILITY_ENFORCE, value);
-    }
     return fs;
   }
 

diff --git a/src/main/asciidoc/_chapters/getting_started.adoc b/src/main/asciidoc/_chapters/getting_started.adoc
@@ -55,80 +55,34 @@ See <<java,Java>> for information about supported JDK versions.
 . Choose a download site from this list of link:https://www.apache.org/dyn/closer.lua/hbase/[Apache Download Mirrors].
   Click on the suggested top link.
   This will take you to a mirror of _HBase Releases_.
-  Click on the folder named _stable_ and then download the binary file that looks like
-  _hbase-<version>-bin.tar.gz_.
+  Click on the folder named _stable_ and then download the binary file that ends in _.tar.gz_ to your local filesystem.
+  Do not download the file ending in _src.tar.gz_ for now.
 
-. Extract the downloaded file and change to the newly-created directory.
+. Extract the downloaded file, and change to the newly-created directory.
 +
+[source,subs="attributes"]
 ----
-$ tar xzvf hbase-<version>-bin.tar.gz
-$ cd hbase-<version>/
+
+$ tar xzvf hbase-{Version}-bin.tar.gz
+$ cd hbase-{Version}/
 ----
 
-. Set the `JAVA_HOME` environment variable in _conf/hbase-env.sh_.
-  First, locate the installation of `java` on your machine. On Unix systems, you can use the
-  _whereis java_ command. Once you have the location, edit _conf/hbase-env.sh_ file, found inside
-  the extracted _hbase-<version>_ directory, uncomment the line starting with `#export JAVA_HOME=`,
-  and then set it to your Java installation path.
+. You must set the `JAVA_HOME` environment variable before starting HBase.
+  To make this easier, HBase lets you set it within the _conf/hbase-env.sh_ file. You must locate where Java is
+  installed on your machine, and one way to find this is by using the _whereis java_ command. Once you have the location,
+  edit the _conf/hbase-env.sh_ file and uncomment the line starting with _#export JAVA_HOME=_, and then set it to your Java installation path.
 +
-.Example extract from _conf/hbase-env.sh_ where `JAVA_HOME` is set
+.Example extract from _hbase-env.sh_ where _JAVA_HOME_ is set
   # Set environment variables here.
   # The java implementation to use.
   export JAVA_HOME=/usr/jdk64/jdk1.8.0_112
 +
-
-. Optionally set the <<hbase.tmp.dir,`hbase.tmp.dir`>> property in _conf/hbase-site.xml_.
-  At this time, you may consider changing the location on the local filesystem where HBase writes
-  its application data and the data  written by its embedded ZooKeeper  instance. By default, HBase
-  uses paths under <<hbase.tmp.dir,`hbase.tmp.dir`>> for these directories.
-+
-NOTE: On most systems, this is a path created under _/tmp_. Many system periodically delete the
-  contents of _/tmp_. If you start working with HBase in this way, and then return after the
-  cleanup operation takes place, you're likely to find strange errors. The following
-  configuration will place HBase's runtime data in a _tmp_ directory found inside the extracted
-  _hbase-<version>_ directory, where it will be safe from this periodic cleanup.
-+
-Open _conf/hbase-site.xml_ and paste the `<property>` tags between the empty `<configuration>`
-tags.
-+
-.Example _hbase-site.xml_ for Standalone HBase
-====
-[source,xml]
-----
-<configuration>
-  <property>
-    <name>hbase.tmp.dir</name>
-    <value>tmp</value>
-  </property>
-</configuration>
-----
-====
-+
-You do not need to create the HBase _tmp_ directory; HBase will do this for you.
-+
-NOTE: When unconfigured, HBase uses <<hbase.tmp.dir,`hbase.tmp.dir`>> as a starting point for many
-important configurations. Notable among them are <<hbase.rootdir,`hbase.rootdir`>>, the path under
-which HBase stores its data. You can specify values for this configuration directly, as you'll see
-in the subsequent sections.
-+
-NOTE: In this example, HBase is running on Hadoop's `LocalFileSystem`. That abstraction doesn't
-provide the durability promises that HBase needs to operate safely. This is most likely acceptable
-for local development and testing use cases. It is not appropriate for production deployments;
-eventually you will lose data. Instead, ensure your production deployment sets
-<<hbase.rootdir,`hbase.rootdir`>> to a durable `FileSystem` implementation.
-
 . The _bin/start-hbase.sh_ script is provided as a convenient way to start HBase.
   Issue the command, and if all goes well, a message is logged to standard output showing that HBase started successfully.
   You can use the `jps` command to verify that you have one running process called `HMaster`.
   In standalone mode HBase runs all daemons within this single JVM, i.e.
   the HMaster, a single HRegionServer, and the ZooKeeper daemon.
   Go to _http://localhost:16010_ to view the HBase Web UI.
-+
-NOTE: Java needs to be installed and available.
-If you get an error indicating that Java is not installed,
-but it is on your system, perhaps in a non-standard location,
-edit the _conf/hbase-env.sh_ file and modify the `JAVA_HOME`
-setting to point to the directory that contains _bin/java_ on your system.
 
 
 [[shell_exercises]]
@@ -288,23 +242,28 @@ The above has shown you how to start and stop a standalone instance of HBase.
 In the next sections we give a quick overview of other modes of hbase deploy.
 
 [[quickstart_pseudo]]
-=== Pseudo-Distributed Local Install
-
-After working your way through the <<quickstart,quickstart>> using standalone mode, you can
-re-configure HBase to run in pseudo-distributed mode. Pseudo-distributed mode means that HBase
-still runs completely on a single host, but each HBase daemon (HMaster, HRegionServer, and
-ZooKeeper) runs as a separate process. Previously in <<quickstart,standalone mode>>, all these
-daemons ran in a single jvm process, and your data was stored under
-<<hbase.tmp.dir,`hbase.tmp.dir`>>. In this walk-through, your data will be stored in in HDFS
-instead, assuming you have HDFS available. This is optional; you can skip the HDFS configuration
-to continue storing your data in the local filesystem.
+=== Pseudo-Distributed for Local Testing
+
+After working your way through <<quickstart,quickstart>> standalone mode,
+you can re-configure HBase to run in pseudo-distributed mode.
+Pseudo-distributed mode means that HBase still runs completely on a single host,
+but each HBase daemon (HMaster, HRegionServer, and ZooKeeper) runs as a separate process:
+in standalone mode all daemons ran in one jvm process/instance.
+By default, unless you configure the `hbase.rootdir` property as described in
+<<quickstart,quickstart>>, your data is still stored in _/tmp/_.
+In this walk-through, we store your data in HDFS instead, assuming you have HDFS available.
+You can skip the HDFS configuration to continue storing your data in the local filesystem.
 
 .Hadoop Configuration
-NOTE: This procedure assumes that you have configured Hadoop and HDFS on your local system and/or a
-remote system, and that they are running and available. It also assumes you are using Hadoop 2.
+[NOTE]
+====
+This procedure assumes that you have configured Hadoop and HDFS on your local system and/or a remote
+system, and that they are running and available. It also assumes you are using Hadoop 2.
 The guide on
 link:https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html[Setting up a Single Node Cluster]
 in the Hadoop documentation is a good starting point.
+====
+
 
 . Stop HBase if it is running.
 +
@@ -325,7 +284,7 @@ First, add the following property which directs HBase to run in distributed mode
 </property>
 ----
 +
-Next, add a configuration for `hbase.rootdir` so that it points to the address of your HDFS instance, using the `hdfs:////` URI syntax.
+Next, add a configuration for `hbase.rootdir`, pointing to the address of your HDFS instance, using the `hdfs:////` URI syntax.
 In this example, HDFS is running on the localhost at port 8020.
 +
 [source,xml]
@@ -337,10 +296,11 @@ In this example, HDFS is running on the localhost at port 8020.
 </property>
 ----
 +
-You do not need to create the directory in HDFS; HBase will do this for you.
-If you create the directory, HBase will attempt to do a migration, which is not what you want.
+You do not need to create the directory in HDFS.
+HBase will do this for you. If you create the directory, HBase will attempt to do a migration, which is not what you want.
 +
-Finally, remove the configuration for `hbase.tmp.dir`.
+Finally, remove existing configuration for `hbase.tmp.dir` and `hbase.unsafe.stream.capability.enforce`,
+
 . Start HBase.
 +
 Use the _bin/start-hbase.sh_ command to start HBase.
@@ -426,7 +386,7 @@ You can stop HBase the same way as in the <<quickstart,quickstart>> procedure, u
 
 
 [[quickstart_fully_distributed]]
-=== Advanced - Fully Distributed
+=== Fully Distributed for Production
 
 In reality, you need a fully-distributed configuration to fully test HBase and to use it in real-world scenarios.
 In a distributed configuration, the cluster contains multiple nodes, each of which runs one or more HBase daemon.
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,3 +23,4 @@ linklint/ @@
     .java-version
     *.log
     **/*.log
+    tmp