diff --git a/docs/_includes/themes/zeppelin/_navigation.html b/docs/_includes/themes/zeppelin/_navigation.html index 6c09e2b4bf4..3cb5e2d18fd 100644 --- a/docs/_includes/themes/zeppelin/_navigation.html +++ b/docs/_includes/themes/zeppelin/_navigation.html @@ -32,7 +32,6 @@
  • Customize Zeppelin Homepage
  • More
  • -
  • Zeppelin on Vagrant VM
  • Upgrade Zeppelin Version
  • @@ -102,6 +101,10 @@
  • Notebook Authorization
  • Data Source Authorization
  • +
  • Advanced
  • +
  • Zeppelin on Vagrant VM
  • +
  • Zeppelin on Spark Cluster Mode (Standalone)
  • +
  • Contibute
  • Writing Zeppelin Interpreter
  • Writing Zeppelin Application (Experimental)
  • diff --git a/docs/assets/themes/zeppelin/img/docs-img/spark_ui.png b/docs/assets/themes/zeppelin/img/docs-img/spark_ui.png new file mode 100644 index 00000000000..ca91cf02432 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/spark_ui.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/standalone_conf.png b/docs/assets/themes/zeppelin/img/docs-img/standalone_conf.png new file mode 100644 index 00000000000..908fc84fbf7 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/standalone_conf.png differ diff --git a/docs/index.md b/docs/index.md index 141e7f6aeef..399393c0ac5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -133,7 +133,6 @@ Join to our [Mailing list](https://zeppelin.apache.org/community.html) and repor * [Publish your Paragraph](./manual/publish.html) results into your external website * [Customize Zeppelin Homepage](./manual/notebookashomepage.html) with one of your notebooks * More - * [Apache Zeppelin on Vagrant VM](./install/virtual_machine.html): a guide for installing Apache Zeppelin on Vagrant virtual machine * [Upgrade Apache Zeppelin Version](./install/upgrade.html): a manual procedure of upgrading Apache Zeppelin version ####Interpreter @@ -168,6 +167,9 @@ Join to our [Mailing list](https://zeppelin.apache.org/community.html) and repor * [Shiro Authentication](./security/shiroauthentication.html) * [Notebook Authorization](./security/notebook_authorization.html) * [Data Source Authorization](./security/datasource_authorization.html) +* Advanced + * [Apache Zeppelin on Vagrant VM](./install/virtual_machine.html) + * [Zeppelin on Spark Cluster Mode (Standalone via Docker)](./install/spark_cluster_mode.html#spark-standalone-mode) * Contribute * [Writing Zeppelin Interpreter](./development/writingzeppelininterpreter.html) * [Writing Zeppelin Application (Experimental)](./development/writingzeppelinapplication.html) diff --git a/docs/install/spark_cluster_mode.md b/docs/install/spark_cluster_mode.md new file mode 100644 index 00000000000..d2517bd2498 --- /dev/null +++ b/docs/install/spark_cluster_mode.md @@ -0,0 +1,74 @@ +--- +layout: page +title: "Apache Zeppelin on Spark cluster mode" +description: "" +group: install +--- + +{% include JB/setup %} + +# Apache Zeppelin on Spark Cluster Mode + +
    + +## Overview +[Apache Spark](http://spark.apache.org/) has supported three cluster manager types([Standalone](http://spark.apache.org/docs/latest/spark-standalone.html), [Apache Mesos](http://spark.apache.org/docs/latest/running-on-mesos.html) and [Hadoop YARN](http://spark.apache.org/docs/latest/running-on-yarn.html)) so far. +This document will guide you how you can build and configure the environment on 3 types of Spark cluster manager with Apache Zeppelin using [Docker](https://www.docker.com/) scripts. +So [install docker](https://docs.docker.com/engine/installation/) on the machine first. + +## Spark standalone mode +[Spark standalone](http://spark.apache.org/docs/latest/spark-standalone.html) is a simple cluster manager included with Spark that makes it easy to set up a cluster. +You can simply set up Spark standalone environment with below steps. + +> **Note :** Since Apache Zeppelin and Spark use same `8080` port for their web UI, you might need to change `zeppelin.server.port` in `conf/zeppelin-site.xml`. + +### 1. Build Docker file +You can find docker script files under `scripts/docker/spark-cluster-managers`. + +``` +cd $ZEPPELIN_HOME/scripts/docker/spark-cluster-managers/spark_standalone +docker build -t "spark_standalone" . +``` + +### 2. Run docker + +``` +docker run -it \ +-p 8080:8080 \ +-p 7077:7077 \ +-p 8888:8888 \ +-p 8081:8081 \ +-h sparkmaster \ +--name spark_standalone \ +spark_standalone bash; +``` + +### 3. Configure Spark interpreter in Zeppelin +Set Spark master as `spark://localhost:7077` in Zeppelin **Interpreters** setting page. + + + +### 4. Run Zeppelin with Spark interpreter +After running single paragraph with Spark interpreter in Zeppelin, browse `https://localhost:8080` and check whether Spark cluster is running well or not. + + + +You can also simply verify that Spark is running well in Docker with below command. + +``` +ps -ef | grep spark +``` + + diff --git a/scripts/docker/spark-cluster-managers/spark_standalone/Dockerfile b/scripts/docker/spark-cluster-managers/spark_standalone/Dockerfile new file mode 100644 index 00000000000..a7bae23b784 --- /dev/null +++ b/scripts/docker/spark-cluster-managers/spark_standalone/Dockerfile @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +FROM centos:centos6 +MAINTAINER hsshim@nflabs.com + +ENV SPARK_PROFILE 1.6 +ENV SPARK_VERSION 1.6.2 +ENV HADOOP_PROFILE 2.3 +ENV SPARK_HOME /usr/local/spark + +# Update the image with the latest packages +RUN yum update -y; yum clean all + +# Get utils +RUN yum install -y \ +wget \ +tar \ +curl \ +&& \ +yum clean all + +# Remove old jdk +RUN yum remove java; yum remove jdk + +# install jdk7 +RUN yum install -y java-1.7.0-openjdk-devel +ENV JAVA_HOME /usr/lib/jvm/java +ENV PATH $PATH:$JAVA_HOME/bin + +# install spark +RUN curl -s http://apache.mirror.cdnetworks.com/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_PROFILE.tgz | tar -xz -C /usr/local/ +RUN cd /usr/local && ln -s spark-$SPARK_VERSION-bin-hadoop$HADOOP_PROFILE spark + +# update boot script +COPY entrypoint.sh /etc/entrypoint.sh +RUN chown root.root /etc/entrypoint.sh +RUN chmod 700 /etc/entrypoint.sh + +#spark +EXPOSE 8080 7077 8888 8081 + +ENTRYPOINT ["/etc/entrypoint.sh"] diff --git a/scripts/docker/spark-cluster-managers/spark_standalone/entrypoint.sh b/scripts/docker/spark-cluster-managers/spark_standalone/entrypoint.sh new file mode 100755 index 00000000000..f4fded0dc9e --- /dev/null +++ b/scripts/docker/spark-cluster-managers/spark_standalone/entrypoint.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +export SPARK_MASTER_PORT=7077 + +# run spark +cd /usr/local/spark/sbin +./start-master.sh +./start-slave.sh spark://`hostname`:$SPARK_MASTER_PORT + +CMD=${1:-"exit 0"} +if [[ "$CMD" == "-d" ]]; +then + service sshd stop + /usr/sbin/sshd -D -d +else + /bin/bash -c "$*" +fi