diff --git a/.gitignore b/.gitignore index 5b33192f8c93..50dfdfb69e53 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ linklint/ .java-version *.log **/*.log +tmp diff --git a/conf/hbase-site.xml b/conf/hbase-site.xml index c516ac729114..48b78ec72d07 100644 --- a/conf/hbase-site.xml +++ b/conf/hbase-site.xml @@ -1,8 +1,7 @@ + + + hbase.cluster.distributed + false + + + hbase.tmp.dir + ${env.HBASE_HOME:-.}/tmp + + + hbase.unsafe.stream.capability.enforce + false + diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java index 8b6ba2d31422..f6ff8fa620b8 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java @@ -28,12 +28,12 @@ import java.util.Locale; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStreamBuilder; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; @@ -41,6 +41,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.ipc.RemoteException; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -353,14 +354,6 @@ public static FileSystem getWALFileSystem(final Configuration c) throws IOExcept if (enforceStreamCapability != null) { fs.getConf().set(UNSAFE_STREAM_CAPABILITY_ENFORCE, enforceStreamCapability); } - if (fs instanceof LocalFileSystem) { - // running on LocalFileSystem, which does not support the required capabilities `HSYNC` - // and `HFLUSH`. disable enforcement. - final boolean value = false; - LOG.warn("Cannot enforce durability guarantees while running on {}. Setting {}={} for" - + " this FileSystem.", fs.getUri(), UNSAFE_STREAM_CAPABILITY_ENFORCE, value); - fs.getConf().setBoolean(UNSAFE_STREAM_CAPABILITY_ENFORCE, value); - } return fs; } diff --git a/src/main/asciidoc/_chapters/getting_started.adoc b/src/main/asciidoc/_chapters/getting_started.adoc index c092ebcc98e5..9e4aa8c069ba 100644 --- a/src/main/asciidoc/_chapters/getting_started.adoc +++ b/src/main/asciidoc/_chapters/getting_started.adoc @@ -55,80 +55,34 @@ See <> for information about supported JDK versions. . Choose a download site from this list of link:https://www.apache.org/dyn/closer.lua/hbase/[Apache Download Mirrors]. Click on the suggested top link. This will take you to a mirror of _HBase Releases_. - Click on the folder named _stable_ and then download the binary file that looks like - _hbase--bin.tar.gz_. + Click on the folder named _stable_ and then download the binary file that ends in _.tar.gz_ to your local filesystem. + Do not download the file ending in _src.tar.gz_ for now. -. Extract the downloaded file and change to the newly-created directory. +. Extract the downloaded file, and change to the newly-created directory. + +[source,subs="attributes"] ---- -$ tar xzvf hbase--bin.tar.gz -$ cd hbase-/ + +$ tar xzvf hbase-{Version}-bin.tar.gz +$ cd hbase-{Version}/ ---- -. Set the `JAVA_HOME` environment variable in _conf/hbase-env.sh_. - First, locate the installation of `java` on your machine. On Unix systems, you can use the - _whereis java_ command. Once you have the location, edit _conf/hbase-env.sh_ file, found inside - the extracted _hbase-_ directory, uncomment the line starting with `#export JAVA_HOME=`, - and then set it to your Java installation path. +. You must set the `JAVA_HOME` environment variable before starting HBase. + To make this easier, HBase lets you set it within the _conf/hbase-env.sh_ file. You must locate where Java is + installed on your machine, and one way to find this is by using the _whereis java_ command. Once you have the location, + edit the _conf/hbase-env.sh_ file and uncomment the line starting with _#export JAVA_HOME=_, and then set it to your Java installation path. + -.Example extract from _conf/hbase-env.sh_ where `JAVA_HOME` is set +.Example extract from _hbase-env.sh_ where _JAVA_HOME_ is set # Set environment variables here. # The java implementation to use. export JAVA_HOME=/usr/jdk64/jdk1.8.0_112 + - -. Optionally set the <> property in _conf/hbase-site.xml_. - At this time, you may consider changing the location on the local filesystem where HBase writes - its application data and the data written by its embedded ZooKeeper instance. By default, HBase - uses paths under <> for these directories. -+ -NOTE: On most systems, this is a path created under _/tmp_. Many system periodically delete the - contents of _/tmp_. If you start working with HBase in this way, and then return after the - cleanup operation takes place, you're likely to find strange errors. The following - configuration will place HBase's runtime data in a _tmp_ directory found inside the extracted - _hbase-_ directory, where it will be safe from this periodic cleanup. -+ -Open _conf/hbase-site.xml_ and paste the `` tags between the empty `` -tags. -+ -.Example _hbase-site.xml_ for Standalone HBase -==== -[source,xml] ----- - - - hbase.tmp.dir - tmp - - ----- -==== -+ -You do not need to create the HBase _tmp_ directory; HBase will do this for you. -+ -NOTE: When unconfigured, HBase uses <> as a starting point for many -important configurations. Notable among them are <>, the path under -which HBase stores its data. You can specify values for this configuration directly, as you'll see -in the subsequent sections. -+ -NOTE: In this example, HBase is running on Hadoop's `LocalFileSystem`. That abstraction doesn't -provide the durability promises that HBase needs to operate safely. This is most likely acceptable -for local development and testing use cases. It is not appropriate for production deployments; -eventually you will lose data. Instead, ensure your production deployment sets -<> to a durable `FileSystem` implementation. - . The _bin/start-hbase.sh_ script is provided as a convenient way to start HBase. Issue the command, and if all goes well, a message is logged to standard output showing that HBase started successfully. You can use the `jps` command to verify that you have one running process called `HMaster`. In standalone mode HBase runs all daemons within this single JVM, i.e. the HMaster, a single HRegionServer, and the ZooKeeper daemon. Go to _http://localhost:16010_ to view the HBase Web UI. -+ -NOTE: Java needs to be installed and available. -If you get an error indicating that Java is not installed, -but it is on your system, perhaps in a non-standard location, -edit the _conf/hbase-env.sh_ file and modify the `JAVA_HOME` -setting to point to the directory that contains _bin/java_ on your system. [[shell_exercises]] @@ -288,23 +242,28 @@ The above has shown you how to start and stop a standalone instance of HBase. In the next sections we give a quick overview of other modes of hbase deploy. [[quickstart_pseudo]] -=== Pseudo-Distributed Local Install - -After working your way through the <> using standalone mode, you can -re-configure HBase to run in pseudo-distributed mode. Pseudo-distributed mode means that HBase -still runs completely on a single host, but each HBase daemon (HMaster, HRegionServer, and -ZooKeeper) runs as a separate process. Previously in <>, all these -daemons ran in a single jvm process, and your data was stored under -<>. In this walk-through, your data will be stored in in HDFS -instead, assuming you have HDFS available. This is optional; you can skip the HDFS configuration -to continue storing your data in the local filesystem. +=== Pseudo-Distributed for Local Testing + +After working your way through <> standalone mode, +you can re-configure HBase to run in pseudo-distributed mode. +Pseudo-distributed mode means that HBase still runs completely on a single host, +but each HBase daemon (HMaster, HRegionServer, and ZooKeeper) runs as a separate process: +in standalone mode all daemons ran in one jvm process/instance. +By default, unless you configure the `hbase.rootdir` property as described in +<>, your data is still stored in _/tmp/_. +In this walk-through, we store your data in HDFS instead, assuming you have HDFS available. +You can skip the HDFS configuration to continue storing your data in the local filesystem. .Hadoop Configuration -NOTE: This procedure assumes that you have configured Hadoop and HDFS on your local system and/or a -remote system, and that they are running and available. It also assumes you are using Hadoop 2. +[NOTE] +==== +This procedure assumes that you have configured Hadoop and HDFS on your local system and/or a remote +system, and that they are running and available. It also assumes you are using Hadoop 2. The guide on link:https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html[Setting up a Single Node Cluster] in the Hadoop documentation is a good starting point. +==== + . Stop HBase if it is running. + @@ -325,7 +284,7 @@ First, add the following property which directs HBase to run in distributed mode ---- + -Next, add a configuration for `hbase.rootdir` so that it points to the address of your HDFS instance, using the `hdfs:////` URI syntax. +Next, add a configuration for `hbase.rootdir`, pointing to the address of your HDFS instance, using the `hdfs:////` URI syntax. In this example, HDFS is running on the localhost at port 8020. + [source,xml] @@ -337,10 +296,11 @@ In this example, HDFS is running on the localhost at port 8020. ---- + -You do not need to create the directory in HDFS; HBase will do this for you. -If you create the directory, HBase will attempt to do a migration, which is not what you want. +You do not need to create the directory in HDFS. +HBase will do this for you. If you create the directory, HBase will attempt to do a migration, which is not what you want. + -Finally, remove the configuration for `hbase.tmp.dir`. +Finally, remove existing configuration for `hbase.tmp.dir` and `hbase.unsafe.stream.capability.enforce`, + . Start HBase. + Use the _bin/start-hbase.sh_ command to start HBase. @@ -426,7 +386,7 @@ You can stop HBase the same way as in the <> procedure, u [[quickstart_fully_distributed]] -=== Advanced - Fully Distributed +=== Fully Distributed for Production In reality, you need a fully-distributed configuration to fully test HBase and to use it in real-world scenarios. In a distributed configuration, the cluster contains multiple nodes, each of which runs one or more HBase daemon.