diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 00000000000..e14cc62a08e --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,33 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +version: '1.0.0-dev.{build}' + +shallow_clone: true + +build: off + +os: + - Visual Studio 2015 + +install: + - echo "Install" + +build_script: + - echo "Build" diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE index 09c1b3bfd52..526b4147827 100644 --- a/.github/PULL_REQUEST_TEMPLATE +++ b/.github/PULL_REQUEST_TEMPLATE @@ -1,6 +1,6 @@ ### What is this PR for? A few sentences describing the overall goals of the pull request's commits. -First time? Check out the contributing guide - https://github.com/apache/zeppelin/blob/master/CONTRIBUTING.md +First time? Check out the contributing guide - https://zeppelin.apache.org/contribution/contributions.html ### What type of PR is it? diff --git a/.gitignore b/.gitignore index f60b256bd93..3f59c4e9b92 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.class +*.pyc # Package Files # *.jar @@ -6,33 +7,36 @@ *.ear # interpreter -/interpreter/ +/interpreter/* +!/interpreter/lib # interpreter temp files -spark/derby.log +derby.log spark/metastore_db spark-1.*-bin-hadoop* .spark-dist -zeppelin-server/derby.log lens/lens-cli-hist.log + # conf file conf/zeppelin-env.sh conf/zeppelin-env.cmd conf/zeppelin-site.xml +conf/shiro.ini conf/keystore conf/truststore conf/interpreter.json conf/notebook-authorization.json conf/shiro.ini conf/credentials.json +conf/helium.json # other generated files spark/dependency-reduced-pom.xml reports -#webapp +# webapp zeppelin-web/node_modules zeppelin-web/dist zeppelin-web/.tmp @@ -40,17 +44,25 @@ zeppelin-web/src/fonts/Roboto* zeppelin-web/src/fonts/Source-Code-Pro* zeppelin-web/src/fonts/Patua-One* zeppelin-web/.sass-cache +zeppelin-web/npm-debug.log +zeppelin-web/yarn-error.log zeppelin-web/bower_components +zeppelin-web/yarn.lock **nbproject/ **node/ -#R +# R /r/lib/ +.Rhistory +/R/ + +# scio +.bigquery/ # project level /logs/ /run/ -/metastore_db/ +**/metastore_db/ /*.log /jobs/ /zan-repo/ @@ -83,7 +95,7 @@ Thumbs.db .project .settings/ -#intelliJ IDEA project files +# intelliJ IDEA project files .idea/ *.iml @@ -91,6 +103,9 @@ Thumbs.db target/ **/target/ +# maven flattened pom files +**/.flattened-pom.xml + # Generated by Jekyll docs/_site/ @@ -104,3 +119,9 @@ tramp .\#* *.swp **/dependency-reduced-pom.xml + +# Generated by zeppelin-examples +/helium + +# tmp files +/tmp/ diff --git a/.travis.yml b/.travis.yml index 680a9f59d6e..d6887ef8762 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,9 +18,14 @@ language: java sudo: false cache: + apt: true directories: - .spark-dist - - ${HOME}/.m2/repository/.cache/maven-download-plugin + - ${HOME}/.m2 + - ${HOME}/R + - zeppelin-web/node + - zeppelin-web/node_modules + - zeppelin-web/bower_components addons: apt: @@ -28,80 +33,110 @@ addons: - r-packages-precise packages: - r-base-dev - - r-cran-evaluate - - r-cran-base64enc + +env: + global: + # Interpreters does not required by zeppelin-server integration tests + - INTERPRETERS='!hbase,!pig,!jdbc,!file,!flink,!ignite,!kylin,!python,!lens,!cassandra,!elasticsearch,!bigquery,!alluxio,!scio,!livy,!groovy' matrix: include: - # Test all modules with spark 2.0.0 and scala 2.11 + # Test License compliance using RAT tool - jdk: "oraclejdk7" - env: SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS="" - - # Test all modules with scala 2.10 + env: SCALA_VER="2.11" SPARK_VER="2.0.2" HADOOP_VER="2.6" PROFILE="-Prat" BUILD_FLAG="clean" TEST_FLAG="org.apache.rat:apache-rat-plugin:check" TEST_PROJECTS="" + + # Test core modules + # + # Several tests were excluded from this configuration due to the following issues: + # HeliumBundleFactoryTest - https://issues.apache.org/jira/browse/ZEPPELIN-2469 + # HeliumApplicationFactoryTest - https://issues.apache.org/jira/browse/ZEPPELIN-2470 + # NotebookTest - https://issues.apache.org/jira/browse/ZEPPELIN-2471 + # ZeppelinRestApiTest - https://issues.apache.org/jira/browse/ZEPPELIN-2473 + # After issues are fixed these tests need to be included back by removing them from the "-Dtests.to.exclude" property - jdk: "oraclejdk7" - env: SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pscala-2.10" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS="" + env: SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pscalding -Phelium-dev -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_PROJECTS="-Dtests.to.exclude=**/ZeppelinSparkClusterTest.java,**/org.apache.zeppelin.spark.*,**/HeliumBundleFactoryTest.java,**/HeliumApplicationFactoryTest.java,**/NotebookTest.java,**/ZeppelinRestApiTest.java -DfailIfNoTests=false" - # Test all modules with scala 2.11 + # Test selenium with spark module for 1.6.3 - jdk: "oraclejdk7" - env: SCALA_VER="2.11" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS="" + env: TEST_SELENIUM="true" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop-2.6 -Phelium-dev -Pexamples" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false" - # Test spark module for 1.5.2 + # Test interpreter modules - jdk: "oraclejdk7" - env: SCALA_VER="2.10" SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false" + env: SCALA_VER="2.10" PROFILE="-Pscalding" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl $(echo .,zeppelin-interpreter,${INTERPRETERS} | sed 's/!//g')" TEST_PROJECTS="" - # Test spark module for 1.4.1 + # Test spark module for 2.1.0 with scala 2.11, livy - jdk: "oraclejdk7" - env: SCALA_VER="2.10" SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false" + env: SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.1 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,livy" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false" - # Test spark module for 1.3.1 + # Test spark module for 2.0.2 with scala 2.11 - jdk: "oraclejdk7" - env: SCALA_VER="2.10" SPARK_VER="1.3.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.3 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false" + env: SCALA_VER="2.11" SPARK_VER="2.0.2" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.0 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false" - # Test spark module for 1.2.2 + # Test spark module for 1.6.3 with scala 2.10 - jdk: "oraclejdk7" - env: SCALA_VER="2.10" SPARK_VER="1.2.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.2 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false" + env: SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop-2.6 -Pscala-2.10" SPARKR="true" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.spark.* -DfailIfNoTests=false" - # Test spark module for 1.1.1 + # Test spark module for 1.6.3 with scala 2.11 - jdk: "oraclejdk7" - env: SCALA_VER="2.10" SPARK_VER="1.1.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.1 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false" + env: SCALA_VER="2.11" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false" - # Test selenium with spark module for 1.6.1 - - jdk: "oraclejdk7" - env: TEST_SELENIUM="true" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false" + # Test python/pyspark with python 2, livy 0.2 + - sudo: required + jdk: "oraclejdk7" + env: PYTHON="2" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.6" LIVY_VER="0.2.0" PROFILE="-Pspark-1.6 -Phadoop-2.6 -Plivy-0.2" BUILD_FLAG="package -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false" + + # Test python/pyspark with python 3, livy 0.3 + - sudo: required + jdk: "oraclejdk7" + env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.6" LIVY_VER="0.3.0" PROFILE="-Pspark-2.0 -Phadoop-2.6 -Pscala-2.11 -Plivy-0.3" BUILD_FLAG="package -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false" before_install: - - "ls -la .spark-dist ${HOME}/.m2/repository/.cache/maven-download-plugin" - - mkdir -p ~/R - - echo 'R_LIBS=~/R' > ~/.Renviron - - R -e "install.packages('knitr', repos = 'http://cran.us.r-project.org', lib='~/R')" - - export R_LIBS='~/R' + # check files included in commit range, clear bower_components if a bower.json file has changed. + # bower cache clearing can also be forced by putting "bower clear" or "clear bower" in a commit message + - changedfiles=$(git diff --name-only $TRAVIS_COMMIT_RANGE 2>/dev/null) || changedfiles="" + - echo $changedfiles + - hasbowerchanged=$(echo $changedfiles | grep -c "bower.json" || true); + - gitlog=$(git log $TRAVIS_COMMIT_RANGE 2>/dev/null) || gitlog="" + - clearcache=$(echo $gitlog | grep -c -E "clear bower|bower clear" || true) + - if [ "$hasbowerchanged" -gt 0 ] || [ "$clearcache" -gt 0 ]; then echo "Clearing bower_components cache"; rm -r zeppelin-web/bower_components; npm cache clear; else echo "Using cached bower_components."; fi + - echo "MAVEN_OPTS='-Xms1024M -Xmx2048M -XX:MaxPermSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.defaultLogLevel=warn'" >> ~/.mavenrc + - ./testing/install_external_dependencies.sh + - ls -la .spark-dist ${HOME}/.m2/repository/.cache/maven-download-plugin || true + - ls .node_modules && cp -r .node_modules zeppelin-web/node_modules || echo "node_modules are not cached" - "/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1600x1024x16" - ./dev/change_scala_version.sh $SCALA_VER + - source ~/.environ install: - - mvn $BUILD_FLAG $PROFILE -B + - mvn $BUILD_FLAG $MODULES $PROFILE -B before_script: - - travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER - - ./testing/startSparkCluster.sh $SPARK_VER $HADOOP_VER + - if [[ -n $SPARK_VER ]]; then travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER; fi + - if [[ -n $LIVY_VER ]]; then ./testing/downloadLivy.sh $LIVY_VER; fi + - if [[ -n $LIVY_VER ]]; then export LIVY_HOME=`pwd`/livy-server-$LIVY_VER; fi + - if [[ -n $LIVY_VER ]]; then export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER; fi - echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh + - echo "export ZEPPELIN_HELIUM_REGISTRY=helium" >> conf/zeppelin-env.sh - tail conf/zeppelin-env.sh script: - - mvn $TEST_FLAG $PROFILE -B $TEST_PROJECTS - + - mvn $TEST_FLAG $MODULES $PROFILE -B $TEST_PROJECTS after_success: - echo "Travis exited with ${TRAVIS_TEST_RESULT}" after_failure: - echo "Travis exited with ${TRAVIS_TEST_RESULT}" - - cat target/rat.txt - - cat zeppelin-server/target/rat.txt + - find . -name rat.txt | xargs cat - cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.log - cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.out - cat zeppelin-web/npm-debug.log - cat spark-*/logs/* - -after_script: - - ./testing/stopSparkCluster.sh $SPARK_VER $HADOOP_VER + - cat livy/target/tmp/*/output.log + - ls -R livy/target/tmp/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/* + - cat livy/target/tmp/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/*/*/*/stdout + - cat livy/target/tmp/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/*/*/*/stderr + - cat livy/target/tmp/livy-int-test/*/output.log + - ls -R livy/target/tmp/livy-int-test/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/* + - cat livy/target/tmp/livy-int-test/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/*/*/*/stdout + - cat livy/target/tmp/livy-int-test/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/*/*/*/stderr diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 906c642dde6..00000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,228 +0,0 @@ -# How to contribute - -**Zeppelin** is [Apache2 License](https://github.com/apache/zeppelin/blob/master/CONTRIBUTING.md) Software. -Contributing to Zeppelin (Source code, Documents, Image, Website) means you agree to the Apache2 License. - -1. Make sure your issue is not already in the [Jira issue tracker](https://issues.apache.org/jira/browse/ZEPPELIN) -2. If not, create a ticket describing the change you're proposing in the [Jira issue tracker](https://issues.apache.org/jira/browse/ZEPPELIN) -3. Contribute your patch via Pull Request. - -Before you start, please read the [Code of Conduct](http://www.apache.org/foundation/policies/conduct.html) carefully, familiarize yourself with it and refer to it whenever you need it. - -For those of you who are not familiar with Apache project, understanding [How it works](http://www.apache.org/foundation/how-it-works.html) would be quite helpful. - -## Creating a Pull Request -In order to make the review process easier, please follow this template when making a Pull Request: - -``` -### What is this PR for? -A few sentences describing the overall goals of the pull request's commits. -First time? Check out the contributing guide - https://github.com/apache/zeppelin/blob/master/CONTRIBUTING.md - -### What type of PR is it? -[Bug Fix | Improvement | Feature | Documentation | Hot Fix | Refactoring] - -### Todos -* [ ] - Task - -### What is the Jira issue? -* Open an issue on Jira https://issues.apache.org/jira/browse/ZEPPELIN/ -* Put link here, and add [ZEPPELIN-*Jira number*] in PR title, eg. [ZEPPELIN-533] - -### How should this be tested? -Outline the steps to test the PR here. - -### Screenshots (if appropriate) - -### Questions: -* Does the licenses files need update? -* Is there breaking changes for older versions? -* Does this needs documentation? -``` - -## Testing a Pull Request -You can also test and review a particular Pull Request. Here are two useful ways. - -* Using a utility provided from Zeppelin. - - ``` - dev/test_zeppelin_pr.py [# of PR] - ``` - - For example, if you want to test `#513`, then the command will be: - - ``` - dev/test_zeppelin_pr.py 513 - ``` - -* Another way is using [github/hub](https://github.com/github/hub). - - ``` - hub checkout https://github.com/apache/zeppelin/pull/[# of PR] - ``` - -The above two methods will help you test and review Pull Requests. - -## Source Control Workflow -Zeppelin follows [Fork & Pull] (https://github.com/sevntu-checkstyle/sevntu.checkstyle/wiki/Development-workflow-with-Git:-Fork,-Branching,-Commits,-and-Pull-Request) model. - -## The Review Process - -When a Pull Request is submitted, it is being merged or rejected by following review process. - -* Anybody can be a reviewer and may comment on the change and suggest modifications. -* Reviewer can indicate that a patch looks suitable for merging with a comment such as: "Looks good", "LGTM", "+1". -* At least one indication of suitable for merging (e.g. "LGTM") from committer is required to be merged. -* Pull request is open for 1 or 2 days for potential additional review, unless it's got enough indication of suitable for merging. -* Committer can initiate lazy consensus ("Merge if there is no more discussion") and the code can be merged after certain time (normally 24 hours) when there is no review exists. -* Contributor can ping reviewers (including committer) by commenting 'Ready to review' or suitable indication. - -## Becoming a Committer - -The PPMC adds new committers from the active contributors, based on their contribution to Zeppelin. The qualifications for new committers include: - -1. Sustained contributions: Committers should have a history of constant contributions to Zeppelin. -2. Quality of contributions: Committers more than any other community member should submit simple, well-tested, and well-designed patches. -3. Community involvement: Committers should have a constructive and friendly attitude in all community interactions. They should also be active on the dev, user list and reviewing patches. Also help new contributors and users. - - -## Setting up -Here are some things you will need to build and test Zeppelin. - -### Software Configuration Management (SCM) - -Zeppelin uses Git for its SCM system. `http://git.apache.org/zeppelin.git` you'll need git client installed in your development machine. -For write access, `https://git-wip-us.apache.org/repos/asf/zeppelin.git` - -### Integrated Development Environment (IDE) - -You are free to use whatever IDE you prefer, or your favorite command line editor. - -### Project Structure - -Zeppelin project is based on Maven. Maven works by convention & defines [directory structure] (https://maven.apache.org/guides/introduction/introduction-to-the-standard-directory-layout.html) for a project. -The top-level pom.xml describes the basic project structure. Currently Zeppelin has the following modules. - - zeppelin-interpreter - zeppelin-zengine - spark - markdown - angular - shell - flink - ignite - lens - cassandra - zeppelin-web - zeppelin-server - zeppelin-distribution - -### Web Project Contribution Guidelines -If you plan on making a contribution to Zeppelin's WebApplication, -please check [its own contribution guidelines](https://github.com/apache/zeppelin/blob/master/zeppelin-web/CONTRIBUTING.md) - -### Code convention -We are following Google Code style: -* [Java style](https://google.github.io/styleguide/javaguide.html) -* [Shell style](https://google.github.io/styleguide/shell.xml) - -Check style report location are in `${submodule}/target/site/checkstyle.html` -Test coverage report location are in `${submodule}/target/site/cobertura/index.html` - -#### Build Tools - -To build the code, install - * Oracle Java 7 - * Apache Maven - -## Getting the source code -First of all, you need the Zeppelin source code. The official location for Zeppelin is [http://git.apache.org/zeppelin.git](http://git.apache.org/zeppelin.git). - -### git access - -Get the source code on your development machine using git. - -``` -git clone git://git.apache.org/zeppelin.git zeppelin -``` - -You may also want to develop against a specific branch. For example, for branch-0.5.6 - -``` -git clone -b branch-0.5.6 git://git.apache.org/zeppelin.git zeppelin -``` - -or with write access - -``` -git clone https://git-wip-us.apache.org/repos/asf/zeppelin.git -``` - -### Fork repository - -If you want not only build Zeppelin but also make change, then you need fork Zeppelin github mirror repository (https://github.com/apache/zeppelin) and make pull request. - - -## Build - -``` -mvn install -``` - -To skip test - -``` -mvn install -DskipTests -``` - -To build with specific spark / hadoop version - -``` -mvn install -Phadoop-2.2 -Dhadoop.version=2.2.0 -Pspark-1.3 -Dspark.version=1.3.0 -``` - -## Tests -Each new File should have its own accompanying unit tests. Each new interpreter should have come with its tests. - - -Zeppelin has 3 types of tests: - - 1. Unit Tests: The unit tests run as part of each package's build. E.g. SparkInterpeter Module's unit test is SparkInterpreterTest - 2. Integration Tests: The integration tests run after all modules are build. The integration tests launch an instance of Zeppelin server. ZeppelinRestApiTest is an example integration test. - 3. GUI integration tests: These tests validate the Zeppelin UI elements. These tests require a running Zeppelin server and launches a web browser to validate Notebook UI elements like Notes and their execution. See ZeppelinIT as an example. - -Currently the GUI integration tests are not run in the Maven and are only run in the CI environment when the pull request is submitted to github. Make sure to watch the [CI results] (https://travis-ci.org/apache/zeppelin/pull_requests) for your pull request. - -## Continuous Integration - -Zeppelin uses Travis for CI. In the project root there is .travis.yml that configures CI and [publishes CI results] (https://travis-ci.org/apache/zeppelin/builds) - - -## Run Zeppelin server in development mode - -``` -cd zeppelin-server -HADOOP_HOME=YOUR_HADOOP_HOME JAVA_HOME=YOUR_JAVA_HOME mvn exec:java -Dexec.mainClass="org.apache.zeppelin.server.ZeppelinServer" -Dexec.args="" -``` - -or use daemon script - -``` -bin/zeppelin-daemon start -``` - - -Server will be run on http://localhost:8080 - -## JIRA -Zeppelin manages it's issues in Jira. [https://issues.apache.org/jira/browse/ZEPPELIN](https://issues.apache.org/jira/browse/ZEPPELIN) - -## Where to Start -You can find issues for [beginner](https://issues.apache.org/jira/browse/ZEPPELIN-924?jql=project%20%3D%20ZEPPELIN%20and%20status%20%3D%20Open%20and%20labels%20in%20\(beginner%2C%20newbie\)) - -## Stay involved -Everyone is welcome to join our mailing list: - - * [users@zeppelin.apache.org](http://mail-archives.apache.org/mod_mbox/zeppelin-users/) is for usage questions, help, and announcements [ [subscribe](mailto:users-subscribe@zeppelin.apache.org?subject=send%20this%20email%20to%20subscribe), [unsubscribe](mailto:users-unsubscribe@zeppelin.apache.org?subject=send%20this%20email%20to%20unsubscribe), [archive](http://mail-archives.apache.org/mod_mbox/zeppelin-users/) ] - * [dev@zeppelin.apache.org](http://mail-archives.apache.org/mod_mbox/zeppelin-users/) is for people who want to contribute code to Zeppelin.[ [subscribe](mailto:dev-subscribe@zeppelin.apache.org?subject=send%20this%20email%20to%20subscribe), [unsubscribe](mailto:dev-unsubscribe@zeppelin.apache.org?subject=send%20this%20email%20to%20unsubscribe), [archive](http://mail-archives.apache.org/mod_mbox/zeppelin-dev/) ] - * [commits@zeppelin.apache.org](http://mail-archives.apache.org/mod_mbox/zeppelin-commits/) is for commit messages and patches to Zeppelin. [ [subscribe](mailto:commits-subscribe@zeppelin.apache.org?subject=send%20this%20email%20to%20subscribe), [unsubscribe](mailto:commits-unsubscribe@zeppelin.apache.org?subject=send%20this%20email%20to%20unsubscribe), [archive](http://mail-archives.apache.org/mod_mbox/zeppelin-commits/) ] diff --git a/LICENSE b/LICENSE index 8b209c53681..e206a6c2eb4 100644 --- a/LICENSE +++ b/LICENSE @@ -221,7 +221,8 @@ The following components are provided under the Open Font License. See project l The text of each license is also included at licenses/LICENSE-[project]-[version].txt. (OFT 1.1) Font Awesome v4.2.0 (http://fortawesome.github.io/Font-Awesome/) - http://scripts.sil.org/OFL - + (OFL 1.1) Patua One Font (see licenses/LICENSE-patuaOne-font) + (OFL 1.1) Source Code Pro Font (see licenses/LICENSE-source_code_pro-font) ======================================================================== MIT licenses @@ -242,7 +243,8 @@ The following components are provided under the MIT-style license. See project l The text of each license is also included at licenses/LICENSE-[project]-[version].txt. (MIT Style) jekyll-table-of-contents (https://github.com/ghiculescu/jekyll-table-of-contents) - https://github.com/ghiculescu/jekyll-table-of-contents/blob/master/LICENSE.txt - + (MIT Style) lunr.js (https://github.com/olivernn/lunr.js) - https://github.com/olivernn/lunr.js/blob/v0.7.1/LICENSE + ======================================================================== Apache licenses ======================================================================== @@ -252,6 +254,8 @@ The text of each license is also included at licenses/LICENSE-[project]-[version (Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE (Apache 2.0) Software under ./bigquery/* was developed at Google (http://www.google.com/). Licensed under the Apache v2.0 License. + (Apache 2.0) Roboto Font (https://github.com/google/roboto/) + (Apache 2.0) Gson extra (https://github.com/DanySK/gson-extras) ======================================================================== BSD 3-Clause licenses @@ -268,7 +272,7 @@ The following components are provided under the BSD 3-Clause license. See file ======================================================================== BSD 2-Clause licenses ======================================================================== -The following components are provided under the BSD 3-Clause license. See file headers and project links for details. +The following components are provided under the BSD 2-Clause license. See file headers and project links for details. (BSD 2 Clause) portions of SQLLine (http://sqlline.sourceforge.net/) - http://sqlline.sourceforge.net/#license jdbc/src/main/java/org/apache/zeppelin/jdbc/SqlCompleter.java diff --git a/README.md b/README.md index 2d8e45e150e..e12d2aedb51 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -#Zeppelin +# Apache Zeppelin **Documentation:** [User Guide](http://zeppelin.apache.org/docs/latest/index.html)
**Mailing Lists:** [User and Dev mailing list](http://zeppelin.apache.org/community.html)
-**Continuous Integration:** [![Build Status](https://secure.travis-ci.org/apache/zeppelin.png?branch=master)](https://travis-ci.org/apache/zeppelin)
-**Contributing:** [Contribution Guide](https://github.com/apache/zeppelin/blob/master/CONTRIBUTING.md)
+**Continuous Integration:** [![Build Status](https://travis-ci.org/apache/zeppelin.svg?branch=master)](https://travis-ci.org/apache/zeppelin)
+**Contributing:** [Contribution Guide](https://zeppelin.apache.org/contribution/contributions.html)
**Issue Tracker:** [Jira](https://issues.apache.org/jira/browse/ZEPPELIN)
**License:** [Apache 2.0](https://github.com/apache/zeppelin/blob/master/LICENSE) @@ -17,300 +17,13 @@ Core feature: To know more about Zeppelin, visit our web site [http://zeppelin.apache.org](http://zeppelin.apache.org) -## Requirements - * Git - * Java 1.7 - * Tested on Mac OSX, Ubuntu 14.X, CentOS 6.X, Windows 7 Pro SP1 - * Maven (if you want to build from the source code) - * Node.js Package Manager (npm, downloaded by Maven during build phase) ## Getting Started -### Before Build -If you don't have requirements prepared, install it. -(The installation method may vary according to your environment, example is for Ubuntu.) +### Install binary package +Please go to [install](http://zeppelin.apache.org/docs/snapshot/install/install.html) to install Apache Zeppelin from binary package. -``` -sudo apt-get update -sudo apt-get install git -sudo apt-get install openjdk-7-jdk -sudo apt-get install npm -sudo apt-get install libfontconfig -``` +### Build from source +Please check [Build from source](http://zeppelin.apache.org/docs/snapshot/install/build.html) to build Zeppelin from source. -#### Proxy settings (optional) -If you are behind a corporate Proxy with NTLM authentication you can use [Cntlm Authentication Proxy](http://cntlm.sourceforge.net/) . -Before build start, run these commands from shell. -``` -export http_proxy=http://localhost:3128 -export https_proxy=http://localhost:3128 -export HTTP_PROXY=http://localhost:3128 -export HTTPS_PROXY=http://localhost:3128 -npm config set proxy http://localhost:3128 -npm config set https-proxy http://localhost:3128 -npm config set registry "http://registry.npmjs.org/" -npm config set strict-ssl false -npm cache clean -git config --global http.proxy http://localhost:3128 -git config --global https.proxy http://localhost:3128 -git config --global url."http://".insteadOf git:// -``` - -After build is complete, run these commands to cleanup. -``` -npm config rm proxy -npm config rm https-proxy -git config --global --unset http.proxy -git config --global --unset https.proxy -git config --global --unset url."http://".insteadOf -``` - -_Notes:_ - - If you are on Windows replace `export` with `set` to set env variables - - Replace `localhost:3128` with standard pattern `http://user:pwd@host:port` - - Git configuration is needed because Bower use it for fetching from GitHub - -#### Install maven -``` -wget http://www.eu.apache.org/dist/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz -sudo tar -zxf apache-maven-3.3.3-bin.tar.gz -C /usr/local/ -sudo ln -s /usr/local/apache-maven-3.3.3/bin/mvn /usr/local/bin/mvn -``` - -_Notes:_ - - Ensure node is installed by running `node --version` - - Ensure maven is running version 3.1.x or higher with `mvn -version` - - Configure maven to use more memory than usual by `export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=1024m"` - -### Build -If you want to build Zeppelin from the source, please first clone this repository, then: - -``` -mvn clean package -DskipTests [Options] -``` - -Each Interpreter requires different Options. - - -#### Spark Interpreter - -To build with a specific Spark version, Hadoop version or specific features, define one or more of the following profiles and options: - -##### `-Pspark-[version]` - -Set spark major version - -Available profiles are - -``` --Pspark-2.0 --Pspark-1.6 --Pspark-1.5 --Pspark-1.4 --Pspark-1.3 --Pspark-1.2 --Pspark-1.1 --Pcassandra-spark-1.5 --Pcassandra-spark-1.4 --Pcassandra-spark-1.3 --Pcassandra-spark-1.2 --Pcassandra-spark-1.1 -``` - -minor version can be adjusted by `-Dspark.version=x.x.x` - - -##### `-Phadoop-[version]` - -set hadoop major version - -Available profiles are - -``` --Phadoop-0.23 --Phadoop-1 --Phadoop-2.2 --Phadoop-2.3 --Phadoop-2.4 --Phadoop-2.6 -``` - -minor version can be adjusted by `-Dhadoop.version=x.x.x` - -##### `-Pscala-[version] (optional)` - -set scala version (default 2.10) -Available profiles are - -``` --Pscala-2.10 --Pscala-2.11 -``` - -##### `-Pyarn` (optional) - -enable YARN support for local mode -> YARN for local mode is not supported for Spark v1.5.0 or higher. Set `SPARK_HOME` instead. - -##### `-Ppyspark` (optional) - -enable [PySpark](http://spark.apache.org/docs/latest/api/python/) support for local mode. - -##### `-Pr` (optional) - -enable [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration. - -##### `-Psparkr` (optional) - -another [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration as well as local mode support. - -##### `-Pvendor-repo` (optional) - -enable 3rd party vendor repository (cloudera) - - -##### `-Pmapr[version]` (optional) - -For the MapR Hadoop Distribution, these profiles will handle the Hadoop version. As MapR allows different versions of Spark to be installed, you should specify which version of Spark is installed on the cluster by adding a Spark profile (`-Pspark-1.2`, `-Pspark-1.3`, etc.) as needed. -The correct Maven artifacts can be found for every version of MapR at http://doc.mapr.com - -Available profiles are - -``` --Pmapr3 --Pmapr40 --Pmapr41 --Pmapr50 --Pmapr51 -``` - - -#### Example - -Here're some examples: - -```sh -# build with spark-2.0, scala-2.11 -./dev/change_scala_version.sh 2.11 -mvn clean package -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pscala-2.11 - -# build with spark-1.6, scala-2.10 -mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr - -# spark-cassandra integration -mvn clean package -Pcassandra-spark-1.5 -Dhadoop.version=2.6.0 -Phadoop-2.6 -DskipTests - -# with CDH -mvn clean package -Pspark-1.5 -Dhadoop.version=2.6.0-cdh5.5.0 -Phadoop-2.6 -Pvendor-repo -DskipTests - -# with MapR -mvn clean package -Pspark-1.5 -Pmapr50 -DskipTests -``` - - -#### Ignite Interpreter - -```sh -mvn clean package -Dignite.version=1.6.0 -DskipTests -``` - -#### Scalding Interpreter - -```sh -mvn clean package -Pscalding -DskipTests -``` - -### Configure -If you wish to configure Zeppelin option (like port number), configure the following files: - -``` -./conf/zeppelin-env.sh -./conf/zeppelin-site.xml -``` - -(You can copy `./conf/zeppelin-env.sh.template` into `./conf/zeppelin-env.sh`. -Same for `zeppelin-site.xml`.) - - -#### Setting SPARK_HOME and HADOOP_HOME - -Without `SPARK_HOME` and `HADOOP_HOME`, Zeppelin uses embedded Spark and Hadoop binaries that you have specified with mvn build option. -If you want to use system provided Spark and Hadoop, export `SPARK_HOME` and `HADOOP_HOME` in `zeppelin-env.sh`. -You can use any supported version of spark without rebuilding Zeppelin. - -```sh -# ./conf/zeppelin-env.sh -export SPARK_HOME=... -export HADOOP_HOME=... -``` - -#### External cluster configuration - -Mesos - -```sh -# ./conf/zeppelin-env.sh -export MASTER=mesos://... -export ZEPPELIN_JAVA_OPTS="-Dspark.executor.uri=/path/to/spark-*.tgz" or SPARK_HOME="/path/to/spark_home" -export MESOS_NATIVE_LIBRARY=/path/to/libmesos.so -``` - -If you set `SPARK_HOME`, you should deploy spark binary on the same location to all worker nodes. And if you set `spark.executor.uri`, every worker can read that file on its node. - -Yarn - -```sh -# ./conf/zeppelin-env.sh -export SPARK_HOME=/path/to/spark_dir -``` - -### Run - -```sh -./bin/zeppelin-daemon.sh start -``` - -And browse [localhost:8080](localhost:8080) in your browser. - - -For configuration details check __`./conf`__ subdirectory. - -### Building for Scala 2.11 - -To produce a Zeppelin package compiled with Scala 2.11, use the -Pscala-2.11 profile: - -``` -./dev/change_scala_version.sh 2.11 -mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Pscala-2.11 -DskipTests clean install -``` - -### Package -To package the final distribution including the compressed archive, run: - -```sh -mvn clean package -Pbuild-distr -``` - -To build a distribution with specific profiles, run: - -```sh -mvn clean package -Pbuild-distr -Pspark-1.5 -Phadoop-2.4 -Pyarn -Ppyspark -``` - -The profiles `-Pspark-1.5 -Phadoop-2.4 -Pyarn -Ppyspark` can be adjusted if you wish to build to a specific spark versions, or omit support such as `yarn`. - -The archive is generated under _`zeppelin-distribution/target`_ directory - -###Run end-to-end tests -Zeppelin comes with a set of end-to-end acceptance tests driving headless selenium browser - -```sh -# assumes zeppelin-server running on localhost:8080 (use -Durl=.. to override) -mvn verify - -# or take care of starting/stoping zeppelin-server from packaged zeppelin-distribuion/target -mvn verify -P using-packaged-distr -``` - -[![Analytics](https://ga-beacon.appspot.com/UA-45176241-4/apache/zeppelin/README.md?pixel)](https://github.com/igrigorik/ga-beacon) diff --git a/SECURITY-README.md b/SECURITY-README.md index f9f6645ff5b..4ef7440db12 100644 --- a/SECURITY-README.md +++ b/SECURITY-README.md @@ -16,12 +16,7 @@ limitations under the License. To connect to Zeppelin, users will be asked to enter their credentials. Once logged, a user has access to all notes including other users notes. This a a first step toward full security as implemented by this pull request (https://github.com/apache/zeppelin/pull/53). -# Security setup -1. Secure the HTTP channel: Comment the line "/** = anon" and uncomment the line "/** = authcBasic" in the file conf/shiro.ini. Read more about he shiro.ini file format at the following URL http://shiro.apache.org/configuration.html#Configuration-INISections. -2. Secure the Websocket channel : Set to property "zeppelin.anonymous.allowed" to "false" in the file conf/zeppelin-site.xml. You can start by renaming conf/zeppelin-site.xml.template to conf/zeppelin-site.xml -3. Start Zeppelin : bin/zeppelin.sh -4. point your browser to http://localhost:8080 -5. Login using one of the user/password combinations defined in the conf/shiro.ini file. +Please check [Shiro authentication in Apache Zeppelin](https://zeppelin.apache.org/docs/snapshot/security/shiroauthentication.html) in our official website for more detailed information(e.g. How to setup the security, How to configure user groups and permissions, and etc). # Implementation notes ## Vocabulary diff --git a/STYLE.md b/STYLE.md index 9a0242e07e3..8182301b5a5 100644 --- a/STYLE.md +++ b/STYLE.md @@ -7,7 +7,7 @@ app/styles/looknfeel Overall look and theme of the Zeppelin notebook page can be customized here. ### Code Syntax Highlighting -There are two parts to code highlighting. First, Zeppelin uses the Ace Editor for its note paragraphs. Color style for this can be changed by setting theme on the editor instance. Second, Zeppelin's Markdown interpreter calls markdown4j to emit HTML, and such content may contain <pre><code> tags that can be consumed by Highlight.js. +There are two parts to code highlighting. First, Zeppelin uses the Ace Editor for its note paragraphs. Color style for this can be changed by setting theme on the editor instance. Second, Zeppelin's Markdown interpreter calls pegdown parser to emit HTML, and such content may contain <pre><code> tags that can be consumed by Highlight.js. #### Theme on Ace Editor app/scripts/controllers/paragraph.js @@ -16,7 +16,7 @@ Call setTheme on the editor with the theme path/name. [List of themes on GitHub](https://github.com/ajaxorg/ace/tree/master/lib/ace/theme) #### Style for Markdown Code Blocks -Highlight.js parses and converts <pre><code> blocks from markdown4j into keywords and language syntax with proper styles. It also attempts to infer the best fitting language if it is not provided. The visual style can be changed by simply including the desired [stylesheet](https://github.com/components/highlightjs/tree/master/styles) into app/index.html. See the next section on build. +Highlight.js parses and converts <pre><code> blocks from pegdown parser into keywords and language syntax with proper styles. It also attempts to infer the best fitting language if it is not provided. The visual style can be changed by simply including the desired [stylesheet](https://github.com/components/highlightjs/tree/master/styles) into app/index.html. See the next section on build. Note that code block background color is overriden in app/styles/notebook.css (look for .paragraph .tableDisplay .hljs). @@ -25,11 +25,11 @@ bower.json In the override section at the bottom, include the Highlightjs stylesheet (eg. styles/github.css) For the selected Ace Editor theme script, include it in the override section. (eg. src-noconflict/theme-github.js) (bower will automatically add the appropriate .js and .css in app/index.html) -``` +```diff "src-noconflict/mode-sql.js", "src-noconflict/mode-markdown.js", "src-noconflict/keybinding-emacs.js", - "src-noconflict/ext-language_tools.js", + "src-noconflict/ext-language_tools.js", + "src-noconflict/theme-github.js"], "version": "1.1.8", "name": "ace-builds" @@ -48,13 +48,13 @@ Highlight.js style - depends on the style, a few themes have jpg - if so, one mu ### Example - change Ace Editor theme to monokai app/scripts/controllers/paragraph.js -``` +```diff - $scope.editor.setTheme('ace/theme/github'); + $scope.editor.setTheme('ace/theme/monokai'); ``` bower.json -``` +```diff - "src-noconflict/theme-github.js"], + "src-noconflict/theme-monokai.js"], ``` diff --git a/_tools/maven-4.0.0.xsd b/_tools/maven-4.0.0.xsd new file mode 100644 index 00000000000..f3a36834a2c --- /dev/null +++ b/_tools/maven-4.0.0.xsd @@ -0,0 +1,2484 @@ + + + + + + + + + 3.0.0+ + + + The <code>&lt;project&gt;</code> element is the root of the descriptor. + The following table lists all of the possible child elements. + + + + + + + 3.0.0+ + + + The <code>&lt;project&gt;</code> element is the root of the descriptor. + The following table lists all of the possible child elements. + + + + + + + 4.0.0+ + Declares to which version of project descriptor this POM conforms. + + + + + 4.0.0+ + The location of the parent project, if one exists. Values from the parent + project will be the default for this project if they are left unspecified. The location + is given as a group ID, artifact ID and version. + + + + + 3.0.0+ + + + A universally unique identifier for a project. It is normal to + use a fully-qualified package name to distinguish it from other + projects with a similar name (eg. <code>org.apache.maven</code>). + + + + + + + 3.0.0+ + The identifier for this artifact that is unique within the group given by the + group ID. An artifact is something that is either produced or used by a project. + Examples of artifacts produced by Maven for a project include: JARs, source and binary + distributions, and WARs. + + + + + 4.0.0+ + The current version of the artifact produced by this project. + + + + + 4.0.0+ + + + The type of artifact this project produces, for example <code>jar</code> + <code>war</code> + <code>ear</code> + <code>pom</code>. + Plugins can create their own packaging, and + therefore their own packaging types, + so this list does not contain all possible types. + + + + + + + 3.0.0+ + The full name of the project. + + + + + 3.0.0+ + A detailed description of the project, used by Maven whenever it needs to + describe the project, such as on the web site. While this element can be specified as + CDATA to enable the use of HTML tags within the description, it is discouraged to allow + plain text representation. If you need to modify the index page of the generated web + site, you are able to specify your own instead of adjusting this text. + + + + + 3.0.0+ + + + The URL to the project's homepage. + <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId + + + + + + + 3.0.0+ + The year of the project's inception, specified with 4 digits. This value is + used when generating copyright notices as well as being informational. + + + + + 3.0.0+ + This element describes various attributes of the organization to which the + project belongs. These attributes are utilized when documentation is created (for + copyright notices and links). + + + + + 3.0.0+ + + + This element describes all of the licenses for this project. + Each license is described by a <code>license</code> element, which + is then described by additional elements. + Projects should only list the license(s) that applies to the project + and not the licenses that apply to dependencies. + If multiple licenses are listed, it is assumed that the user can select + any of them, not that they must accept all. + + + + + + + + + + + + 3.0.0+ + Describes the committers of a project. + + + + + + + + + + 3.0.0+ + Describes the contributors to a project that are not yet committers. + + + + + + + + + + 3.0.0+ + Contains information about a project's mailing lists. + + + + + + + + + + 4.0.0+ + Describes the prerequisites in the build environment for this project. + + + + + 4.0.0+ + The modules (sometimes called subprojects) to build as a part of this + project. Each module listed is a relative path to the directory containing the module. + To be consistent with the way default urls are calculated from parent, it is recommended + to have module names match artifact ids. + + + + + + + + + + 4.0.0+ + Specification for the SCM used by the project, such as CVS, Subversion, etc. + + + + + 4.0.0+ + The project's issue management system information. + + + + + 4.0.0+ + The project's continuous integration information. + + + + + 4.0.0+ + Distribution information for a project that enables deployment of the site + and artifacts to remote web servers and repositories respectively. + + + + + 4.0.0+ + + + Properties that can be used throughout the POM as a substitution, and + are used as filters in resources if enabled. + The format is <code>&lt;name&gt;value&lt;/name&gt;</code>. + + + + + + + + + + + + 4.0.0+ + Default dependency information for projects that inherit from this one. The + dependencies in this section are not immediately resolved. Instead, when a POM derived + from this one declares a dependency described by a matching groupId and artifactId, the + version and other values from this section are used for that dependency if they were not + already specified. + + + + + 3.0.0+ + + + This element describes all of the dependencies associated with a + project. + These dependencies are used to construct a classpath for your + project during the build process. They are automatically downloaded from the + repositories defined in this project. + See <a href="http://maven.apache.org/guides/introduction/introduction-to-dependency-mechanism.html">the + dependency mechanism</a> for more information. + + + + + + + + + + + + 4.0.0+ + The lists of the remote repositories for discovering dependencies and + extensions. + + + + + + + + + + 4.0.0+ + The lists of the remote repositories for discovering plugins for builds and + reports. + + + + + + + + + + 3.0.0+ + Information required to build the project. + + + + + 4.0.0+ + + + <b>Deprecated</b>. Now ignored by Maven. + + + + + + + + + + + + 4.0.0+ + + + This element includes the specification of report plugins to use + to generate the reports on the Maven-generated site. + These reports will be run when a user executes <code>mvn site</code>. + All of the reports will be included in the navigation bar for browsing. + + + + + + + 4.0.0+ + A listing of project-local build profiles which will modify the build process + when activated. + + + + + + + + + + + + 4.0.0+ + + + The <code>&lt;parent&gt;</code> element contains information required to locate the parent project from which + this project will inherit from. + <strong>Note:</strong> The children of this element are not interpolated and must be given as literal values. + + + + + + + 4.0.0+ + The group id of the parent project to inherit from. + + + + + 4.0.0+ + The artifact id of the parent project to inherit from. + + + + + 4.0.0+ + The version of the parent project to inherit. + + + + + 4.0.0+ + + + The relative path of the parent <code>pom.xml</code> file within the check out. + If not specified, it defaults to <code>../pom.xml</code>. + Maven looks for the parent POM first in this location on + the filesystem, then the local repository, and lastly in the remote repo. + <code>relativePath</code> allows you to select a different location, + for example when your structure is flat, or deeper without an intermediate parent POM. + However, the group ID, artifact ID and version are still required, + and must match the file in the location given or it will revert to the repository for the POM. + This feature is only for enhancing the development in a local checkout of that project. + Set the value to an empty string in case you want to disable the feature and always resolve + the parent POM from the repositories. + + + + + + + + + 3.0.0+ + Specifies the organization that produces this project. + + + + + 3.0.0+ + The full name of the organization. + + + + + 3.0.0+ + The URL to the organization's home page. + + + + + + + 4.0.0+ + This elements describes all that pertains to distribution for a project. It is + primarily used for deployment of artifacts and the site produced by the build. + + + + + 4.0.0+ + Information needed to deploy the artifacts generated by the project to a + remote repository. + + + + + 4.0.0+ + + + Where to deploy snapshots of artifacts to. If not given, it defaults to the + <code>repository</code> element. + + + + + + + 4.0.0+ + Information needed for deploying the web site of the project. + + + + + 4.0.0+ + + + The URL of the project's download page. If not given users will be + referred to the homepage given by <code>url</code>. + This is given to assist in locating artifacts that are not in the repository due to + licensing restrictions. + + + + + + + 4.0.0+ + Relocation information of the artifact if it has been moved to a new group ID + and/or artifact ID. + + + + + 4.0.0+ + + + Gives the status of this artifact in the remote repository. + This must not be set in your local project, as it is updated by + tools placing it in the reposiory. Valid values are: <code>none</code> (default), + <code>converted</code> (repository manager converted this from an Maven 1 POM), + <code>partner</code> + (directly synced from a partner Maven 2 repository), <code>deployed</code> (was deployed from a Maven 2 + instance), <code>verified</code> (has been hand verified as correct and final). + + + + + + + + + 4.0.0+ + Describes where an artifact has moved to. If any of the values are omitted, it is + assumed to be the same as it was before. + + + + + 4.0.0+ + The group ID the artifact has moved to. + + + + + 4.0.0+ + The new artifact ID of the artifact. + + + + + 4.0.0+ + The new version of the artifact. + + + + + 4.0.0+ + An additional message to show the user about the move, such as the reason. + + + + + + + 4.0.0+ + Contains the information needed for deploying websites. + + + + + 4.0.0+ + + + A unique identifier for a deployment location. This is used to match the + site to configuration in the <code>settings.xml</code> file, for example. + + + + + + + 4.0.0+ + Human readable name of the deployment location. + + + + + 4.0.0+ + + + The url of the location where website is deployed, in the form <code>protocol://hostname/path</code>. + <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId + + + + + + + + + 4.0.0+ + Repository contains the information needed for deploying to the remote + repository. + + + + + 4.0.0+ + Whether to assign snapshots a unique version comprised of the timestamp and + build number, or to use the same version each time + + + + + 4.0.0+ + How to handle downloading of releases from this repository. + + + + + 4.0.0+ + How to handle downloading of snapshots from this repository. + + + + + 4.0.0+ + + + A unique identifier for a repository. This is used to match the repository + to configuration in the <code>settings.xml</code> file, for example. Furthermore, the identifier is + used during POM inheritance and profile injection to detect repositories that should be merged. + + + + + + + 4.0.0+ + Human readable name of the repository. + + + + + 4.0.0+ + + + The url of the repository, in the form <code>protocol://hostname/path</code>. + + + + + + + 4.0.0+ + + + The type of layout this repository uses for locating and storing artifacts - + can be <code>legacy</code> or <code>default</code>. + + + + + + + + + 4.0.0+ + Download policy. + + + + + 4.0.0+ + + + Whether to use this repository for downloading this type of artifact. Note: While the type + of this field is <code>String</code> for technical reasons, the semantic type is actually + <code>Boolean</code>. Default value is <code>true</code>. + + + + + + + 4.0.0+ + + + The frequency for downloading updates - can be + <code>always,</code> + <code>daily</code> + (default), + <code>interval:XXX</code> + (in minutes) or + <code>never</code> + (only if it doesn't exist locally). + + + + + + + 4.0.0+ + + + What to do when verification of an artifact checksum fails. Valid values are + <code>ignore</code> + , + <code>fail</code> + or + <code>warn</code> + (the default). + + + + + + + + + 4.0.0+ + Describes the prerequisites a project can have. + + + + + 4.0.0+ + + For a plugin project, the minimum version of Maven required to use + the resulting plugin.<br /> + For specifying the minimum version of Maven required to build a + project, this element is <b>deprecated</b>. Use the Maven Enforcer + Plugin's <a href="https://maven.apache.org/enforcer/enforcer-rules/requireMavenVersion.html"><code>requireMavenVersion</code></a> + rule instead. + + + + + + + + + 3.0.0+ + Description of a person who has contributed to the project, but who does not have + commit privileges. Usually, these contributions come in the form of patches submitted. + + + + + 3.0.0+ + The full name of the contributor. + + + + + 3.0.0+ + The email address of the contributor. + + + + + 3.0.0+ + The URL for the homepage of the contributor. + + + + + 3.0.0+ + The organization to which the contributor belongs. + + + + + 3.0.0+ + The URL of the organization. + + + + + 3.0.0+ + + + The roles the contributor plays in the project. Each role is described by a + <code>role</code> element, the body of which is a role name. This can also be used to + describe the contribution. + + + + + + + + + + + + 3.0.0+ + + + The timezone the contributor is in. Typically, this is a number in the range + <a href="http://en.wikipedia.org/wiki/UTC%E2%88%9212:00">-12</a> to <a href="http://en.wikipedia.org/wiki/UTC%2B14:00">+14</a> + or a valid time zone id like "America/Montreal" (UTC-05:00) or "Europe/Paris" (UTC+01:00). + + + + + + + 3.0.0+ + Properties about the contributor, such as an instant messenger handle. + + + + + + + + + + + + 4.0.0+ + + + The <code>&lt;scm&gt;</code> element contains informations required to the SCM + (Source Control Management) of the project. + + + + + + + 4.0.0+ + + + The source control management system URL + that describes the repository and how to connect to the + repository. For more information, see the + <a href="http://maven.apache.org/scm/scm-url-format.html">URL format</a> + and <a href="http://maven.apache.org/scm/scms-overview.html">list of supported SCMs</a>. + This connection is read-only. + <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId + + + + + + + 4.0.0+ + + + Just like <code>connection</code>, but for developers, i.e. this scm connection + will not be read only. + <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId + + + + + + + 4.0.0+ + The tag of current code. By default, it's set to HEAD during development. + + + + + 4.0.0+ + + + The URL to the project's browsable SCM repository, such as ViewVC or Fisheye. + <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId + + + + + + + + + 4.0.0+ + A repository contains the information needed for establishing connections with + remote repository. + + + + + 4.0.0+ + How to handle downloading of releases from this repository. + + + + + 4.0.0+ + How to handle downloading of snapshots from this repository. + + + + + 4.0.0+ + + + A unique identifier for a repository. This is used to match the repository + to configuration in the <code>settings.xml</code> file, for example. Furthermore, the identifier is + used during POM inheritance and profile injection to detect repositories that should be merged. + + + + + + + 4.0.0+ + Human readable name of the repository. + + + + + 4.0.0+ + + + The url of the repository, in the form <code>protocol://hostname/path</code>. + + + + + + + 4.0.0+ + + + The type of layout this repository uses for locating and storing artifacts - + can be <code>legacy</code> or <code>default</code>. + + + + + + + + + 4.0.0+ + Information about the issue tracking (or bug tracking) system used to manage this + project. + + + + + 4.0.0+ + The name of the issue management system, e.g. Bugzilla + + + + + 4.0.0+ + URL for the issue management system used by the project. + + + + + + + 4.0.0+ + + + The <code>&lt;CiManagement&gt;</code> element contains informations required to the + continuous integration system of the project. + + + + + + + 4.0.0+ + + + The name of the continuous integration system, e.g. <code>continuum</code>. + + + + + + + 4.0.0+ + URL for the continuous integration system used by the project if it has a web + interface. + + + + + 4.0.0+ + Configuration for notifying developers/users when a build is unsuccessful, + including user information and notification mode. + + + + + + + + + + + + 4.0.0+ + Configures one method for notifying users/developers when a build breaks. + + + + + 4.0.0+ + The mechanism used to deliver notifications. + + + + + 4.0.0+ + Whether to send notifications on error. + + + + + 4.0.0+ + Whether to send notifications on failure. + + + + + 4.0.0+ + Whether to send notifications on success. + + + + + 4.0.0+ + Whether to send notifications on warning. + + + + + 4.0.0+ + + + <b>Deprecated</b>. Where to send the notification to - eg email address. + + + + + + + 0.0.0+ + Extended configuration specific to this notifier goes here. + + + + + + + + + + + + 4.0.0+ + Modifications to the build process which is activated based on environmental + parameters or command line arguments. + + + + + 4.0.0+ + The identifier of this build profile. This is used for command line + activation, and identifies profiles to be merged. + + + + + + 4.0.0+ + The conditional logic which will automatically trigger the inclusion of this + profile. + + + + + 4.0.0+ + Information required to build the project. + + + + + 4.0.0+ + The modules (sometimes called subprojects) to build as a part of this + project. Each module listed is a relative path to the directory containing the module. + To be consistent with the way default urls are calculated from parent, it is recommended + to have module names match artifact ids. + + + + + + + + + + 4.0.0+ + Distribution information for a project that enables deployment of the site + and artifacts to remote web servers and repositories respectively. + + + + + 4.0.0+ + + + Properties that can be used throughout the POM as a substitution, and + are used as filters in resources if enabled. + The format is <code>&lt;name&gt;value&lt;/name&gt;</code>. + + + + + + + + + + + + 4.0.0+ + Default dependency information for projects that inherit from this one. The + dependencies in this section are not immediately resolved. Instead, when a POM derived + from this one declares a dependency described by a matching groupId and artifactId, the + version and other values from this section are used for that dependency if they were not + already specified. + + + + + 3.0.0+ + + + This element describes all of the dependencies associated with a + project. + These dependencies are used to construct a classpath for your + project during the build process. They are automatically downloaded from the + repositories defined in this project. + See <a href="http://maven.apache.org/guides/introduction/introduction-to-dependency-mechanism.html">the + dependency mechanism</a> for more information. + + + + + + + + + + + + 4.0.0+ + The lists of the remote repositories for discovering dependencies and + extensions. + + + + + + + + + + 4.0.0+ + The lists of the remote repositories for discovering plugins for builds and + reports. + + + + + + + + + + 4.0.0+ + + + <b>Deprecated</b>. Now ignored by Maven. + + + + + + + + + + + + 4.0.0+ + + + This element includes the specification of report plugins to use + to generate the reports on the Maven-generated site. + These reports will be run when a user executes <code>mvn site</code>. + All of the reports will be included in the navigation bar for browsing. + + + + + + + + + 3.0.0+ + Generic informations for a build. + + + + + 3.0.0+ + The default goal (or phase in Maven 2) to execute when none is specified for + the project. Note that in case of a multi-module build, only the default goal of the top-level + project is relevant, i.e. the default goals of child modules are ignored. Since Maven 3, + multiple goals/phases can be separated by whitespace. + + + + + 3.0.0+ + + This element describes all of the classpath resources such as properties + files associated with a project. These resources are often included in the final + package. + The default value is <code>src/main/resources</code>. + + + + + + + + + + + 4.0.0+ + + This element describes all of the classpath resources such as properties + files associated with a project's unit tests. + The default value is <code>src/test/resources</code>. + + + + + + + + + + + 4.0.0+ + + The directory where all files generated by the build are placed. + The default value is <code>target</code>. + + + + + + 4.0.0+ + + + The filename (excluding the extension, and with no path information) that + the produced artifact will be called. + The default value is <code>${artifactId}-${version}</code>. + + + + + + + 4.0.0+ + The list of filter properties files that are used when filtering is enabled. + + + + + + + + + + 4.0.0+ + Default plugin information to be made available for reference by projects + derived from this one. This plugin configuration will not be resolved or bound to the + lifecycle unless referenced. Any local configuration for a given plugin will override + the plugin's entire definition here. + + + + + 4.0.0+ + The list of plugins to use. + + + + + + + + + + + + 4.0.0+ + + + The <code>&lt;plugin&gt;</code> element contains informations required for a plugin. + + + + + + + 4.0.0+ + The group ID of the plugin in the repository. + + + + + 4.0.0+ + The artifact ID of the plugin in the repository. + + + + + 4.0.0+ + The version (or valid range of versions) of the plugin to be used. + + + + + 4.0.0+ + + + Whether to load Maven extensions (such as packaging and type handlers) from + this plugin. For performance reasons, this should only be enabled when necessary. Note: While the type + of this field is <code>String</code> for technical reasons, the semantic type is actually + <code>Boolean</code>. Default value is <code>false</code>. + + + + + + + 4.0.0+ + Multiple specifications of a set of goals to execute during the build + lifecycle, each having (possibly) a different configuration. + + + + + + + + + + 4.0.0+ + Additional dependencies that this project needs to introduce to the plugin's + classloader. + + + + + + + + + + 4.0.0+ + + + <b>Deprecated</b>. Unused by Maven. + + + + + + + + + + + + 4.0.0+ + + + Whether any configuration should be propagated to child POMs. Note: While the type + of this field is <code>String</code> for technical reasons, the semantic type is actually + <code>Boolean</code>. Default value is <code>true</code>. + + + + + + + 0.0.0+ + + + <p>The configuration as DOM object.</p> + <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add + <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p> + <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code> + or <code>combine.self</code> attributes to the children of the configuration element:</p> + <ul> + <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li> + <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li> + </ul> + <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and + <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a> + for more information.</p> + + + + + + + + + + + + + + 3.0.0+ + + + The <code>&lt;dependency&gt;</code> element contains information about a dependency + of the project. + + + + + + + 3.0.0+ + + + The project group that produced the dependency, e.g. + <code>org.apache.maven</code>. + + + + + + + 3.0.0+ + + + The unique id for an artifact produced by the project group, e.g. + <code>maven-artifact</code>. + + + + + + + 3.0.0+ + + + The version of the dependency, e.g. <code>3.2.1</code>. In Maven 2, this can also be + specified as a range of versions. + + + + + + + 4.0.0+ + + + The type of dependency. While it + usually represents the extension on the filename of the dependency, + that is not always the case. A type can be mapped to a different + extension and a classifier. + The type often corresponds to the packaging used, though this is also + not always the case. + Some examples are <code>jar</code>, <code>war</code>, <code>ejb-client</code> + and <code>test-jar</code>: see <a href="../maven-core/artifact-handlers.html">default + artifact handlers</a> for a list. + New types can be defined by plugins that set + <code>extensions</code> to <code>true</code>, so this is not a complete list. + + + + + + + 4.0.0+ + + + The classifier of the dependency. It is appended to + the filename after the version. This allows: + <ul> + <li>refering to attached artifact, for example <code>sources</code> and <code>javadoc</code>: + see <a href="../maven-core/artifact-handlers.html">default artifact handlers</a> for a list,</li> + <li>distinguishing two artifacts + that belong to the same POM but were built differently. + For example, <code>jdk14</code> and <code>jdk15</code>.</li> + </ul> + + + + + + + 4.0.0+ + + + The scope of the dependency - <code>compile</code>, <code>runtime</code>, + <code>test</code>, <code>system</code>, and <code>provided</code>. Used to + calculate the various classpaths used for compilation, testing, and so on. + It also assists in determining which artifacts to include in a distribution of + this project. For more information, see + <a href="http://maven.apache.org/guides/introduction/introduction-to-dependency-mechanism.html">the + dependency mechanism</a>. + + + + + + + 4.0.0+ + + + FOR SYSTEM SCOPE ONLY. Note that use of this property is <b>discouraged</b> + and may be replaced in later versions. This specifies the path on the filesystem + for this dependency. + Requires an absolute path for the value, not relative. + Use a property that gives the machine specific absolute path, + e.g. <code>${java.home}</code>. + + + + + + + 4.0.0+ + Lists a set of artifacts that should be excluded from this dependency's + artifact list when it comes to calculating transitive dependencies. + + + + + + + + + + 4.0.0+ + + + Indicates the dependency is optional for use of this library. While the + version of the dependency will be taken into account for dependency calculation if the + library is used elsewhere, it will not be passed on transitively. Note: While the type + of this field is <code>String</code> for technical reasons, the semantic type is actually + <code>Boolean</code>. Default value is <code>false</code>. + + + + + + + + + 4.0.0+ + + + The <code>&lt;exclusion&gt;</code> element contains informations required to exclude + an artifact to the project. + + + + + + + 4.0.0+ + The artifact ID of the project to exclude. + + + + + 4.0.0+ + The group ID of the project to exclude. + + + + + + + 4.0.0+ + + + The <code>&lt;execution&gt;</code> element contains informations required for the + execution of a plugin. + + + + + + + 4.0.0+ + The identifier of this execution for labelling the goals during the build, + and for matching executions to merge during inheritance and profile injection. + + + + + 4.0.0+ + The build lifecycle phase to bind the goals in this execution to. If omitted, + the goals will be bound to the default phase specified by the plugin. + + + + + 4.0.0+ + The goals to execute with the given configuration. + + + + + + + + + + 4.0.0+ + + + Whether any configuration should be propagated to child POMs. Note: While the type + of this field is <code>String</code> for technical reasons, the semantic type is actually + <code>Boolean</code>. Default value is <code>true</code>. + + + + + + + 0.0.0+ + + + <p>The configuration as DOM object.</p> + <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add + <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p> + <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code> + or <code>combine.self</code> attributes to the children of the configuration element:</p> + <ul> + <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li> + <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li> + </ul> + <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and + <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a> + for more information.</p> + + + + + + + + + + + + + + 3.0.0+ + This element describes all of the classpath resources associated with a project + or unit tests. + + + + + 3.0.0+ + + + Describe the resource target path. The path is relative to the target/classes + directory (i.e. <code>${project.build.outputDirectory}</code>). + For example, if you want that resource to appear in a specific package + (<code>org.apache.maven.messages</code>), you must specify this + element with this value: <code>org/apache/maven/messages</code>. + This is not required if you simply put the resources in that directory + structure at the source, however. + + + + + + + 3.0.0+ + + + Whether resources are filtered to replace tokens with parameterised values or not. + The values are taken from the <code>properties</code> element and from the + properties in the files listed in the <code>filters</code> element. Note: While the type + of this field is <code>String</code> for technical reasons, the semantic type is actually + <code>Boolean</code>. Default value is <code>false</code>. + + + + + + + 3.0.0+ + Describe the directory where the resources are stored. The path is relative + to the POM. + + + + + 3.0.0+ + + + A list of patterns to include, e.g. <code>**&#47;*.xml</code>. + + + + + + + + + + + + 3.0.0+ + + + A list of patterns to exclude, e.g. <code>**&#47;*.xml</code> + + + + + + + + + + + + + + 4.0.0+ + Section for management of default plugin information for use in a group of POMs. + + + + + + 4.0.0+ + The list of plugins to use. + + + + + + + + + + + + 4.0.0+ + Section for management of reports and their configuration. + + + + + 4.0.0+ + + + If true, then the default reports are not included in the site generation. + This includes the reports in the "Project Info" menu. Note: While the type + of this field is <code>String</code> for technical reasons, the semantic type is actually + <code>Boolean</code>. Default value is <code>false</code>. + + + + + + + 4.0.0+ + + + Where to store all of the generated reports. The default is + <code>${project.build.directory}/site</code>. + + + + + + + 4.0.0+ + The reporting plugins to use and their configuration. + + + + + + + + + + + + 4.0.0+ + + + The <code>&lt;plugin&gt;</code> element contains informations required for a report plugin. + + + + + + + 4.0.0+ + The group ID of the reporting plugin in the repository. + + + + + 4.0.0+ + The artifact ID of the reporting plugin in the repository. + + + + + 4.0.0+ + The version of the reporting plugin to be used. + + + + + 4.0.0+ + + + Multiple specifications of a set of reports, each having (possibly) different + configuration. This is the reporting parallel to an <code>execution</code> in the build. + + + + + + + + + + + + 4.0.0+ + + + Whether any configuration should be propagated to child POMs. Note: While the type + of this field is <code>String</code> for technical reasons, the semantic type is actually + <code>Boolean</code>. Default value is <code>true</code>. + + + + + + + 0.0.0+ + + + <p>The configuration as DOM object.</p> + <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add + <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p> + <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code> + or <code>combine.self</code> attributes to the children of the configuration element:</p> + <ul> + <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li> + <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li> + </ul> + <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and + <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a> + for more information.</p> + + + + + + + + + + + + + + 4.0.0+ + Represents a set of reports and configuration to be used to generate them. + + + + + 0.0.0+ + The unique id for this report set, to be used during POM inheritance and profile injection + for merging of report sets. + + + + + + 4.0.0+ + The list of reports from this plugin which should be generated from this set. + + + + + + + + + + 4.0.0+ + + + Whether any configuration should be propagated to child POMs. Note: While the type + of this field is <code>String</code> for technical reasons, the semantic type is actually + <code>Boolean</code>. Default value is <code>true</code>. + + + + + + + 0.0.0+ + + + <p>The configuration as DOM object.</p> + <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add + <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p> + <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code> + or <code>combine.self</code> attributes to the children of the configuration element:</p> + <ul> + <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li> + <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li> + </ul> + <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and + <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a> + for more information.</p> + + + + + + + + + + + + + + 4.0.0+ + The conditions within the build runtime environment which will trigger the + automatic inclusion of the build profile. Multiple conditions can be defined, which must + be all satisfied to activate the profile. + + + + + + 4.0.0+ + If set to true, this profile will be active unless another profile in this + pom is activated using the command line -P option or by one of that profile's + activators. + + + + + 4.0.0+ + + + Specifies that this profile will be activated when a matching JDK is detected. + For example, <code>1.4</code> only activates on JDKs versioned 1.4, + while <code>!1.4</code> matches any JDK that is not version 1.4. Ranges are supported too: + <code>[1.5,)</code> activates when the JDK is 1.5 minimum. + + + + + + + 4.0.0+ + Specifies that this profile will be activated when matching operating system + attributes are detected. + + + + + 4.0.0+ + Specifies that this profile will be activated when this system property is + specified. + + + + + 4.0.0+ + Specifies that this profile will be activated based on existence of a file. + + + + + + + 4.0.0+ + This is the property specification used to activate a profile. If the value field + is empty, then the existence of the named property will activate the profile, otherwise it + does a case-sensitive match against the property value as well. + + + + + 4.0.0+ + The name of the property to be used to activate a profile. + + + + + 4.0.0+ + The value of the property required to activate a profile. + + + + + + + 4.0.0+ + This is an activator which will detect an operating system's attributes in order + to activate its profile. + + + + + 4.0.0+ + + + The name of the operating system to be used to activate the profile. This must be an exact match + of the <code>${os.name}</code> Java property, such as <code>Windows XP</code>. + + + + + + + 4.0.0+ + + + The general family of the OS to be used to activate the profile, such as + <code>windows</code> or <code>unix</code>. + + + + + + + 4.0.0+ + The architecture of the operating system to be used to activate the + profile. + + + + + 4.0.0+ + The version of the operating system to be used to activate the + profile. + + + + + + + 4.0.0+ + This is the file specification used to activate the profile. The <code>missing</code> value + is the location of a file that needs to exist, and if it doesn't, the profile will be + activated. On the other hand, <code>exists</code> will test for the existence of the file and if it is + there, the profile will be activated.<br/> + Variable interpolation for these file specifications is limited to <code>${basedir}</code>, + System properties and request properties. + + + + + 4.0.0+ + The name of the file that must be missing to activate the + profile. + + + + + 4.0.0+ + The name of the file that must exist to activate the profile. + + + + + + + 4.0.0+ + Section for management of default dependency information for use in a group of + POMs. + + + + + 4.0.0+ + The dependencies specified here are not used until they are referenced in a + POM within the group. This allows the specification of a "standard" version for a + particular dependency. + + + + + + + + + + + + 3.0.0+ + + + The <code>&lt;build&gt;</code> element contains informations required to build the project. + Default values are defined in Super POM. + + + + + + + 3.0.0+ + + This element specifies a directory containing the source of the project. The + generated build system will compile the sources from this directory when the project is + built. The path given is relative to the project descriptor. + The default value is <code>src/main/java</code>. + + + + + + 4.0.0+ + + This element specifies a directory containing the script sources of the + project. This directory is meant to be different from the sourceDirectory, in that its + contents will be copied to the output directory in most cases (since scripts are + interpreted rather than compiled). + The default value is <code>src/main/scripts</code>. + + + + + + 4.0.0+ + + This element specifies a directory containing the unit test source of the + project. The generated build system will compile these directories when the project is + being tested. The path given is relative to the project descriptor. + The default value is <code>src/test/java</code>. + + + + + + 4.0.0+ + + The directory where compiled application classes are placed. + The default value is <code>target/classes</code>. + + + + + + 4.0.0+ + + The directory where compiled test classes are placed. + The default value is <code>target/test-classes</code>. + + + + + + 4.0.0+ + A set of build extensions to use from this project. + + + + + + + + + + 3.0.0+ + The default goal (or phase in Maven 2) to execute when none is specified for + the project. Note that in case of a multi-module build, only the default goal of the top-level + project is relevant, i.e. the default goals of child modules are ignored. Since Maven 3, + multiple goals/phases can be separated by whitespace. + + + + + 3.0.0+ + + This element describes all of the classpath resources such as properties + files associated with a project. These resources are often included in the final + package. + The default value is <code>src/main/resources</code>. + + + + + + + + + + + 4.0.0+ + + This element describes all of the classpath resources such as properties + files associated with a project's unit tests. + The default value is <code>src/test/resources</code>. + + + + + + + + + + + 4.0.0+ + + The directory where all files generated by the build are placed. + The default value is <code>target</code>. + + + + + + 4.0.0+ + + + The filename (excluding the extension, and with no path information) that + the produced artifact will be called. + The default value is <code>${artifactId}-${version}</code>. + + + + + + + 4.0.0+ + The list of filter properties files that are used when filtering is enabled. + + + + + + + + + + 4.0.0+ + Default plugin information to be made available for reference by projects + derived from this one. This plugin configuration will not be resolved or bound to the + lifecycle unless referenced. Any local configuration for a given plugin will override + the plugin's entire definition here. + + + + + 4.0.0+ + The list of plugins to use. + + + + + + + + + + + + 4.0.0+ + Describes a build extension to utilise. + + + + + 4.0.0+ + The group ID of the extension's artifact. + + + + + 4.0.0+ + The artifact ID of the extension. + + + + + 4.0.0+ + The version of the extension. + + + + + + + 3.0.0+ + Describes the licenses for this project. This is used to generate the license + page of the project's web site, as well as being taken into consideration in other reporting + and validation. The licenses listed for the project are that of the project itself, and not + of dependencies. + + + + + 3.0.0+ + The full legal name of the license. + + + + + 3.0.0+ + The official url for the license text. + + + + + 3.0.0+ + + + The primary method by which this project may be distributed. + <dl> + <dt>repo</dt> + <dd>may be downloaded from the Maven repository</dd> + <dt>manual</dt> + <dd>user must manually download and install the dependency.</dd> + </dl> + + + + + + + 3.0.0+ + Addendum information pertaining to this license. + + + + + + + 3.0.0+ + This element describes all of the mailing lists associated with a project. The + auto-generated site references this information. + + + + + 3.0.0+ + + + The name of the mailing list. + + + + + + + 3.0.0+ + + + The email address or link that can be used to subscribe to + the mailing list. If this is an email address, a + <code>mailto:</code> link will automatically be created + when the documentation is created. + + + + + + + 3.0.0+ + + + The email address or link that can be used to unsubscribe to + the mailing list. If this is an email address, a + <code>mailto:</code> link will automatically be created + when the documentation is created. + + + + + + + 3.0.0+ + + + The email address or link that can be used to post to + the mailing list. If this is an email address, a + <code>mailto:</code> link will automatically be created + when the documentation is created. + + + + + + + 3.0.0+ + The link to a URL where you can browse the mailing list archive. + + + + + 3.0.0+ + The link to alternate URLs where you can browse the list archive. + + + + + + + + + + + + 3.0.0+ + Information about one of the committers on this project. + + + + + 3.0.0+ + The unique ID of the developer in the SCM. + + + + + 3.0.0+ + The full name of the contributor. + + + + + 3.0.0+ + The email address of the contributor. + + + + + 3.0.0+ + The URL for the homepage of the contributor. + + + + + 3.0.0+ + The organization to which the contributor belongs. + + + + + 3.0.0+ + The URL of the organization. + + + + + 3.0.0+ + + + The roles the contributor plays in the project. Each role is described by a + <code>role</code> element, the body of which is a role name. This can also be used to + describe the contribution. + + + + + + + + + + + + 3.0.0+ + + + The timezone the contributor is in. Typically, this is a number in the range + <a href="http://en.wikipedia.org/wiki/UTC%E2%88%9212:00">-12</a> to <a href="http://en.wikipedia.org/wiki/UTC%2B14:00">+14</a> + or a valid time zone id like "America/Montreal" (UTC-05:00) or "Europe/Paris" (UTC+01:00). + + + + + + + 3.0.0+ + Properties about the contributor, such as an instant messenger handle. + + + + + + + + + + \ No newline at end of file diff --git a/alluxio/pom.xml b/alluxio/pom.xml index e788c2a7c32..38135b81793 100644 --- a/alluxio/pom.xml +++ b/alluxio/pom.xml @@ -22,20 +22,20 @@ zeppelin org.apache.zeppelin - 0.6.2-SNAPSHOT + 0.8.0-SNAPSHOT .. org.apache.zeppelin zeppelin-alluxio jar - 0.6.2-SNAPSHOT + 0.8.0-SNAPSHOT Zeppelin: Alluxio interpreter 1.0.0 - 1.6.1 + org.apache.zeppelin @@ -75,35 +75,30 @@ org.mockito mockito-all - 1.10.8 test org.powermock powermock-api-mockito - ${powermock.version} test org.powermock powermock-core - ${powermock.version} test org.powermock powermock-module-junit4 - ${powermock.version} test org.powermock powermock-reflect - ${powermock.version} test @@ -133,7 +128,6 @@ maven-enforcer-plugin - 1.3.1 enforce @@ -144,7 +138,6 @@ maven-dependency-plugin - 2.8 copy-dependencies diff --git a/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java b/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java index a6fed71f327..8eb152bae71 100644 --- a/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java +++ b/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java @@ -23,9 +23,9 @@ import java.io.ByteArrayOutputStream; import java.util.*; +import org.apache.zeppelin.completer.CompletionType; import org.apache.zeppelin.interpreter.Interpreter; import org.apache.zeppelin.interpreter.InterpreterContext; -import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder; import org.apache.zeppelin.interpreter.InterpreterResult; import org.apache.zeppelin.interpreter.InterpreterResult.Code; import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion; @@ -68,15 +68,6 @@ public AlluxioInterpreter(Properties property) { alluxioMasterPort = property.getProperty(ALLUXIO_MASTER_PORT); } - static { - Interpreter.register("alluxio", "alluxio", - AlluxioInterpreter.class.getName(), - new InterpreterPropertyBuilder() - .add(ALLUXIO_MASTER_HOSTNAME, "localhost", "Alluxio master hostname") - .add(ALLUXIO_MASTER_PORT, "19998", "Alluxio master port") - .build()); - } - @Override public void open() { logger.info("Starting Alluxio shell to connect to " + alluxioMasterHostname + @@ -116,14 +107,14 @@ private InterpreterResult interpret(String[] commands, InterpreterContext contex System.setOut(ps); for (String command : commands) { - int commandResuld = 1; + int commandResult = 1; String[] args = splitAndRemoveEmpty(command, " "); if (args.length > 0 && args[0].equals("help")) { System.out.println(getCommandList()); } else { - commandResuld = fs.run(args); + commandResult = fs.run(args); } - if (commandResuld != 0) { + if (commandResult != 0) { isSuccess = false; break; } else { @@ -144,7 +135,7 @@ private InterpreterResult interpret(String[] commands, InterpreterContext contex private String[] splitAndRemoveEmpty(String st, String splitSeparator) { String[] voices = st.split(splitSeparator); - ArrayList result = new ArrayList(); + ArrayList result = new ArrayList<>(); for (String voice : voices) { if (!voice.trim().isEmpty()) { result.add(voice); @@ -154,7 +145,7 @@ private String[] splitAndRemoveEmpty(String st, String splitSeparator) { } private String[] splitAndRemoveEmpty(String[] sts, String splitSeparator) { - ArrayList result = new ArrayList(); + ArrayList result = new ArrayList<>(); for (String st : sts) { result.addAll(Arrays.asList(splitAndRemoveEmpty(st, splitSeparator))); } @@ -175,16 +166,18 @@ public int getProgress(InterpreterContext context) { } @Override - public List completion(String buf, int cursor) { + public List completion(String buf, int cursor, + InterpreterContext interpreterContext) { String[] words = splitAndRemoveEmpty(splitAndRemoveEmpty(buf, "\n"), " "); String lastWord = ""; if (words.length > 0) { lastWord = words[ words.length - 1 ]; } - ArrayList voices = new ArrayList<>(); + + List voices = new LinkedList<>(); for (String command : keywords) { if (command.startsWith(lastWord)) { - voices.add(command); + voices.add(new InterpreterCompletion(command, command, CompletionType.command.name())); } } return voices; diff --git a/alluxio/src/main/resources/interpreter-setting.json b/alluxio/src/main/resources/interpreter-setting.json new file mode 100644 index 00000000000..8b082abde4b --- /dev/null +++ b/alluxio/src/main/resources/interpreter-setting.json @@ -0,0 +1,24 @@ +[ + { + "group": "alluxio", + "name": "alluxio", + "className": "org.apache.zeppelin.alluxio.AlluxioInterpreter", + "properties": { + "alluxio.master.hostname": { + "envName": "ALLUXIO_MASTER_HOSTNAME", + "propertyName": "alluxio.master.hostname", + "defaultValue": "localhost", + "description": "Alluxio master hostname" + }, + "alluxio.master.port": { + "envName": "ALLUXIO_MASTER_PORT", + "propertyName": "alluxio.master.port", + "defaultValue": "19998", + "description": "Alluxio master port" + } + }, + "editor": { + "editOnDblClick": false + } + } +] diff --git a/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java b/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java index 61d97b50d5e..e272a51e507 100644 --- a/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java +++ b/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java @@ -29,6 +29,8 @@ import alluxio.client.WriteType; import alluxio.client.file.URIStatus; + +import org.apache.zeppelin.completer.CompletionType; import org.apache.zeppelin.interpreter.InterpreterResult; import org.apache.zeppelin.interpreter.InterpreterResult.Code; import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion; @@ -77,25 +79,40 @@ public final void before() throws Exception { @Test public void testCompletion() { - List expectedResultOne = Arrays.asList("cat", "chgrp", - "chmod", "chown", "copyFromLocal", "copyToLocal", "count", - "createLineage"); - List expectedResultTwo = Arrays.asList("copyFromLocal", - "copyToLocal", "count"); - List expectedResultThree = Arrays.asList("copyFromLocal", "copyToLocal"); - List expectedResultNone = new ArrayList(); - - List resultOne = alluxioInterpreter.completion("c", 0); - List resultTwo = alluxioInterpreter.completion("co", 0); - List resultThree = alluxioInterpreter.completion("copy", 0); - List resultNotMatch = alluxioInterpreter.completion("notMatch", 0); - List resultAll = alluxioInterpreter.completion("", 0); + List expectedResultOne = Arrays.asList( + new InterpreterCompletion("cat", "cat", CompletionType.command.name()), + new InterpreterCompletion("chgrp", "chgrp", CompletionType.command.name()), + new InterpreterCompletion("chmod", "chmod", CompletionType.command.name()), + new InterpreterCompletion("chown", "chown", CompletionType.command.name()), + new InterpreterCompletion("copyFromLocal", "copyFromLocal", CompletionType.command.name()), + new InterpreterCompletion("copyToLocal", "copyToLocal", CompletionType.command.name()), + new InterpreterCompletion("count", "count", CompletionType.command.name()), + new InterpreterCompletion("createLineage", "createLineage", CompletionType.command.name())); + List expectedResultTwo = Arrays.asList( + new InterpreterCompletion("copyFromLocal", "copyFromLocal", CompletionType.command.name()), + new InterpreterCompletion("copyToLocal", "copyToLocal", CompletionType.command.name()), + new InterpreterCompletion("count", "count", CompletionType.command.name())); + List expectedResultThree = Arrays.asList( + new InterpreterCompletion("copyFromLocal", "copyFromLocal", CompletionType.command.name()), + new InterpreterCompletion("copyToLocal", "copyToLocal", CompletionType.command.name())); + List expectedResultNone = new ArrayList<>(); + + List resultOne = alluxioInterpreter.completion("c", 0, null); + List resultTwo = alluxioInterpreter.completion("co", 0, null); + List resultThree = alluxioInterpreter.completion("copy", 0, null); + List resultNotMatch = alluxioInterpreter.completion("notMatch", 0, null); + List resultAll = alluxioInterpreter.completion("", 0, null); Assert.assertEquals(expectedResultOne, resultOne); Assert.assertEquals(expectedResultTwo, resultTwo); Assert.assertEquals(expectedResultThree, resultThree); Assert.assertEquals(expectedResultNone, resultNotMatch); - Assert.assertEquals(alluxioInterpreter.keywords, resultAll); + + List allCompletionList = new ArrayList<>(); + for (InterpreterCompletion ic : resultAll) { + allCompletionList.add(ic.getName()); + } + Assert.assertEquals(alluxioInterpreter.keywords, allCompletionList); } @Test @@ -107,7 +124,7 @@ public void catDirectoryTest() throws IOException { "\ncat /testDir", null); Assert.assertEquals(Code.ERROR, output.code()); - Assert.assertEquals(expected, output.message()); + Assert.assertEquals(expected, output.message().get(0).getData()); } @Test @@ -126,7 +143,7 @@ public void catTest() throws IOException { Assert.assertEquals(Code.SUCCESS, output.code()); Assert.assertArrayEquals(expected, - output.message().substring(0, output.message().length() - 1).getBytes()); + output.message().get(0).getData().substring(0, output.message().get(0).getData().length() - 1).getBytes()); } @Test @@ -142,7 +159,7 @@ public void copyFromLocalLargeTest() throws IOException, AlluxioException { testFile.getAbsolutePath() + " /testFile", null); Assert.assertEquals( "Copied " + testFile.getAbsolutePath() + " to /testFile\n\n", - output.message()); + output.message().get(0).getData()); long fileLength = fs.getStatus(new AlluxioURI("/testFile")).getLength(); Assert.assertEquals(SIZE_BYTES, fileLength); @@ -200,7 +217,7 @@ public void copyFromLocalTest() throws IOException, AlluxioException { testFile.getParent() + " /testDir", null); Assert.assertEquals( "Copied " + testFile.getParent() + " to /testDir\n\n", - output.message()); + output.message().get(0).getData()); long fileLength1 = fs.getStatus(new AlluxioURI("/testDir/testFile")).getLength(); long fileLength2 = fs.getStatus(new AlluxioURI("/testDir/testDirInner/testFile2")).getLength(); @@ -227,7 +244,7 @@ public void copyFromLocalTestWithFullURI() throws IOException, AlluxioException testFile.getPath() + " " + uri, null); Assert.assertEquals( "Copied " + testFile.getPath() + " to " + uri + "\n\n", - output.message()); + output.message().get(0).getData()); long fileLength = fs.getStatus(new AlluxioURI("/destFileURI")).getLength(); Assert.assertEquals(10L, fileLength); @@ -275,7 +292,7 @@ private void copyToLocalWithBytes(int bytes) throws IOException { Assert.assertEquals( "Copied /testFile to " + mLocalAlluxioCluster.getAlluxioHome() + "/testFile\n\n", - output.message()); + output.message().get(0).getData()); fileReadTest("/testFile", 10); } @@ -284,7 +301,7 @@ public void countNotExistTest() throws IOException { InterpreterResult output = alluxioInterpreter.interpret("count /NotExistFile", null); Assert.assertEquals(Code.ERROR, output.code()); Assert.assertEquals(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage("/NotExistFile") + "\n", - output.message()); + output.message().get(0).getData()); } @Test @@ -303,14 +320,14 @@ public void countTest() throws IOException { expected += String.format(format, "File Count", "Folder Count", "Total Bytes"); expected += String.format(format, 3, 2, 60); expected += "\n"; - Assert.assertEquals(expected, output.message()); + Assert.assertEquals(expected, output.message().get(0).getData()); } @Test public void fileinfoNotExistTest() throws IOException { InterpreterResult output = alluxioInterpreter.interpret("fileInfo /NotExistFile", null); Assert.assertEquals(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage("/NotExistFile") + "\n", - output.message()); + output.message().get(0).getData()); Assert.assertEquals(Code.ERROR, output.code()); } @@ -318,7 +335,7 @@ public void fileinfoNotExistTest() throws IOException { public void locationNotExistTest() throws IOException { InterpreterResult output = alluxioInterpreter.interpret("location /NotExistFile", null); Assert.assertEquals(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage("/NotExistFile") + "\n", - output.message()); + output.message().get(0).getData()); Assert.assertEquals(Code.ERROR, output.code()); } @@ -351,7 +368,7 @@ public void lsTest() throws IOException, AlluxioException { expected += "\n"; Assert.assertEquals(Code.SUCCESS, output.code()); - Assert.assertEquals(expected, output.message()); + Assert.assertEquals(expected, output.message().get(0).getData()); } @Test @@ -391,7 +408,7 @@ public void lsRecursiveTest() throws IOException, AlluxioException { "/testRoot/testFileC"); expected += "\n"; - Assert.assertEquals(expected, output.message()); + Assert.assertEquals(expected, output.message().get(0).getData()); } @Test @@ -402,7 +419,7 @@ public void mkdirComplexPathTest() throws IOException, AlluxioException { boolean existsDir = fs.exists(new AlluxioURI("/Complex!@#$%^&*()-_=+[]{};\"'<>,.?/File")); Assert.assertEquals( "Successfully created directory /Complex!@#$%^&*()-_=+[]{};\"'<>,.?/File\n\n", - output.message()); + output.message().get(0).getData()); Assert.assertTrue(existsDir); } @@ -426,7 +443,7 @@ public void mkdirShortPathTest() throws IOException, AlluxioException { boolean existsDir = fs.exists(new AlluxioURI("/root/testFile1")); Assert.assertEquals( "Successfully created directory /root/testFile1\n\n", - output.message()); + output.message().get(0).getData()); Assert.assertTrue(existsDir); } @@ -439,7 +456,7 @@ public void mkdirTest() throws IOException, AlluxioException { boolean existsDir = fs.exists(new AlluxioURI("/root/testFile1")); Assert.assertEquals( "Successfully created directory " + qualifiedPath + "\n\n", - output.message()); + output.message().get(0).getData()); Assert.assertTrue(existsDir); } diff --git a/angular/pom.xml b/angular/pom.xml index 4358dae1e5d..be43e496a49 100644 --- a/angular/pom.xml +++ b/angular/pom.xml @@ -22,14 +22,14 @@ zeppelin org.apache.zeppelin - 0.6.2-SNAPSHOT + 0.8.0-SNAPSHOT .. org.apache.zeppelin zeppelin-angular jar - 0.6.2-SNAPSHOT + 0.8.0-SNAPSHOT Zeppelin: Angular interpreter @@ -61,8 +61,7 @@ maven-enforcer-plugin - 1.3.1 - + enforce none @@ -72,7 +71,6 @@ maven-dependency-plugin - 2.8 copy-dependencies diff --git a/angular/src/main/java/org/apache/zeppelin/angular/AngularInterpreter.java b/angular/src/main/java/org/apache/zeppelin/angular/AngularInterpreter.java index 1b65f0f3b0d..696e4503192 100644 --- a/angular/src/main/java/org/apache/zeppelin/angular/AngularInterpreter.java +++ b/angular/src/main/java/org/apache/zeppelin/angular/AngularInterpreter.java @@ -34,9 +34,6 @@ * */ public class AngularInterpreter extends Interpreter { - static { - Interpreter.register("angular", AngularInterpreter.class.getName()); - } public AngularInterpreter(Properties property) { super(property); @@ -70,7 +67,8 @@ public int getProgress(InterpreterContext context) { } @Override - public List completion(String buf, int cursor) { + public List completion(String buf, int cursor, + InterpreterContext interpreterContext) { return new LinkedList<>(); } diff --git a/angular/src/main/resources/interpreter-setting.json b/angular/src/main/resources/interpreter-setting.json new file mode 100644 index 00000000000..4ff59781b48 --- /dev/null +++ b/angular/src/main/resources/interpreter-setting.json @@ -0,0 +1,12 @@ +[ + { + "group": "angular", + "name": "angular", + "className": "org.apache.zeppelin.angular.AngularInterpreter", + "properties": { + }, + "editor": { + "editOnDblClick": true + } + } +] diff --git a/beam/README.md b/beam/README.md new file mode 100644 index 00000000000..57150a0208a --- /dev/null +++ b/beam/README.md @@ -0,0 +1,25 @@ +# Overview +Beam interpreter for Apache Zeppelin + +# Architecture +Current interpreter implementation supports the static repl. It compiles the code in memory, execute it and redirect the output to zeppelin. + +## Building the Beam Interpreter +You have to first build the Beam interpreter by enable the **beam** profile as follows: + +``` +mvn clean package -Pbeam -DskipTests +``` + +### Notice +- Flink runner comes with binary compiled for scala 2.10. So, currently we support only Scala 2.10 + +### Technical overview + + * Upon starting an interpreter, an instance of `JavaCompiler` is created. + + * When the user runs commands with beam, the `JavaParser` go through the code to get a class that contains the main method. + + * Then it replaces the class name with random class name to avoid overriding while compilation. it creates new out & err stream to get the data in new stream instead of the console, to redirect output to zeppelin. + + * If there is any error during compilation, it can catch and redirect to zeppelin. diff --git a/beam/pom.xml b/beam/pom.xml new file mode 100644 index 00000000000..c02695c460d --- /dev/null +++ b/beam/pom.xml @@ -0,0 +1,300 @@ + + + + + 4.0.0 + + + zeppelin + org.apache.zeppelin + 0.8.0-SNAPSHOT + .. + + + org.apache.zeppelin + zeppelin-beam + jar + 0.8.0-SNAPSHOT + Zeppelin: Beam interpreter + + + 2.3.0 + 1.6.2 + 0.2.0-incubating + + + 4.1.1.Final + 3.1.0 + 1.3 + + + + + io.netty + netty-all + ${netty.version} + + + + org.apache.spark + spark-core_2.10 + ${beam.spark.version} + + + slf4j-log4j12 + org.slf4j + + + netty-all + io.netty + + + akka-actor_2.10 + org.spark-project.akka + + + akka-remote_2.10 + org.spark-project.akka + + + akka-slf4j_2.10 + org.spark-project.akka + + + + + + org.apache.spark + spark-streaming_2.10 + ${beam.spark.version} + + + + org.apache.hadoop + hadoop-mapreduce-client-core + ${beam.hadoop.version} + + + slf4j-log4j12 + org.slf4j + + + + + + org.apache.hadoop + hadoop-common + ${beam.hadoop.version} + + + slf4j-log4j12 + org.slf4j + + + + + + org.apache.zeppelin + zeppelin-scio_${scala.binary.version} + ${project.version} + + + + org.apache.hadoop + hadoop-hdfs + ${beam.hadoop.version} + + + + org.apache.hadoop + hadoop-client + ${beam.hadoop.version} + + + slf4j-log4j12 + org.slf4j + + + + + + org.apache.hadoop + hadoop-annotations + ${beam.hadoop.version} + + + + org.apache.hadoop + hadoop-yarn-common + ${beam.hadoop.version} + + + + org.apache.hadoop + hadoop-mapreduce-client-common + ${beam.hadoop.version} + + + slf4j-log4j12 + org.slf4j + + + + + + com.thoughtworks.qdox + qdox + 2.0-M3 + + + + org.apache.beam + beam-runners-parent + ${beam.beam.version} + pom + + + + org.apache.beam + beam-runners-core-java + ${beam.beam.version} + + + google-http-client-jackson2 + com.google.http-client + + + + + + org.apache.beam + beam-runners-direct-java + ${beam.beam.version} + + + + javax.servlet + javax.servlet-api + ${servlet.api.version} + + + + org.apache.beam + beam-runners-google-cloud-dataflow-java + ${beam.beam.version} + + + google-http-client-jackson2 + com.google.http-client + + + + + + org.apache.beam + beam-runners-spark + ${beam.beam.version} + jar + + + + ${project.groupId} + zeppelin-interpreter + ${project.version} + provided + + + + org.apache.commons + commons-exec + ${commons.exec.version} + + + + junit + junit + test + + + + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + maven-enforcer-plugin + + + enforce + none + + + + + + maven-dependency-plugin + + + copy-dependencies + package + + copy-dependencies + + + ${project.build.directory}/../../interpreter/beam + false + false + true + runtime + + + + copy-artifact + package + + copy + + + ${project.build.directory}/../../interpreter/beam + false + false + true + runtime + + + ${project.groupId} + ${project.artifactId} + ${project.version} + ${project.packaging} + + + + + + + + + + diff --git a/beam/src/main/java/org/apache/zeppelin/beam/BeamInterpreter.java b/beam/src/main/java/org/apache/zeppelin/beam/BeamInterpreter.java new file mode 100644 index 00000000000..37ccfae44fe --- /dev/null +++ b/beam/src/main/java/org/apache/zeppelin/beam/BeamInterpreter.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.beam; + +import java.io.File; +import java.util.Collections; +import java.util.List; +import java.util.Properties; +import java.util.UUID; + +import org.apache.zeppelin.interpreter.Interpreter; +import org.apache.zeppelin.interpreter.InterpreterContext; +import org.apache.zeppelin.interpreter.InterpreterResult; +import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Beam interpreter + * + */ +public class BeamInterpreter extends Interpreter { + + Logger logger = LoggerFactory.getLogger(BeamInterpreter.class); + + public BeamInterpreter(Properties property) { + super(property); + } + + @Override + public void open() { + + } + + @Override + public void close() { + File dir = new File("."); + // delete all .class files created while compilation process + for (int i = 0; i < dir.list().length; i++) { + File f = dir.listFiles()[i]; + if (f.getAbsolutePath().endsWith(".class")) { + f.delete(); + } + } + } + + @Override + public InterpreterResult interpret(String code, InterpreterContext context) { + + // choosing new name to class containing Main method + String generatedClassName = "C" + UUID.randomUUID().toString().replace("-", ""); + + try { + String res = StaticRepl.execute(generatedClassName, code); + return new InterpreterResult(InterpreterResult.Code.SUCCESS, res); + } catch (Exception e) { + logger.error("Exception in Interpreter while interpret", e); + return new InterpreterResult(InterpreterResult.Code.ERROR, e.getMessage()); + + } + + } + + @Override + public void cancel(InterpreterContext context) { + + } + + @Override + public FormType getFormType() { + return FormType.SIMPLE; + } + + @Override + public int getProgress(InterpreterContext context) { + return 0; + } + + @Override + public List completion(String buf, int cursor, + InterpreterContext interpreterContext) { + return Collections.emptyList(); + } + +} diff --git a/beam/src/main/java/org/apache/zeppelin/beam/StaticRepl.java b/beam/src/main/java/org/apache/zeppelin/beam/StaticRepl.java new file mode 100644 index 00000000000..ed81146bb1c --- /dev/null +++ b/beam/src/main/java/org/apache/zeppelin/beam/StaticRepl.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.beam; + +import javax.tools.Diagnostic; +import javax.tools.DiagnosticCollector; +import javax.tools.JavaCompiler; +import javax.tools.JavaCompiler.CompilationTask; +import javax.tools.JavaFileObject; +import javax.tools.SimpleJavaFileObject; +import javax.tools.ToolProvider; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.thoughtworks.qdox.JavaProjectBuilder; +import com.thoughtworks.qdox.model.JavaClass; +import com.thoughtworks.qdox.model.JavaSource; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.PrintStream; +import java.io.StringReader; +import java.lang.reflect.InvocationTargetException; +import java.net.URI; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.Arrays; +import java.util.List; + +/** + * + * StaticRepl for compling the java code in memory + * + */ +public class StaticRepl { + static Logger logger = LoggerFactory.getLogger(StaticRepl.class); + + public static String execute(String generatedClassName, String code) throws Exception { + + JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); + DiagnosticCollector diagnostics = new DiagnosticCollector(); + + // Java parasing + JavaProjectBuilder builder = new JavaProjectBuilder(); + JavaSource src = builder.addSource(new StringReader(code)); + + // get all classes in code (paragraph) + List classes = src.getClasses(); + String mainClassName = null; + + // Searching for class containing Main method + for (int i = 0; i < classes.size(); i++) { + boolean hasMain = false; + + for (int j = 0; j < classes.get(i).getMethods().size(); j++) { + if (classes.get(i).getMethods().get(j).getName().equals("main") && classes.get(i) + .getMethods().get(j).isStatic()) { + mainClassName = classes.get(i).getName(); + hasMain = true; + break; + } + } + if (hasMain == true) { + break; + } + + } + + // if there isn't Main method, will retuen error + if (mainClassName == null) { + logger.error("Exception for Main method", "There isn't any class " + + "containing static main method."); + throw new Exception("There isn't any class containing static main method."); + } + + // replace name of class containing Main method with generated name + code = code.replace(mainClassName, generatedClassName); + + JavaFileObject file = new JavaSourceFromString(generatedClassName, code.toString()); + Iterable compilationUnits = Arrays.asList(file); + + ByteArrayOutputStream baosOut = new ByteArrayOutputStream(); + ByteArrayOutputStream baosErr = new ByteArrayOutputStream(); + + // Creating new stream to get the output data + PrintStream newOut = new PrintStream(baosOut); + PrintStream newErr = new PrintStream(baosErr); + // Save the old System.out! + PrintStream oldOut = System.out; + PrintStream oldErr = System.err; + // Tell Java to use your special stream + System.setOut(newOut); + System.setErr(newErr); + + CompilationTask task = compiler.getTask(null, null, diagnostics, null, null, compilationUnits); + + // executing the compilation process + boolean success = task.call(); + + // if success is false will get error + if (!success) { + for (Diagnostic diagnostic : diagnostics.getDiagnostics()) { + if (diagnostic.getLineNumber() == -1) { + continue; + } + System.err.println("line " + diagnostic.getLineNumber() + " : " + + diagnostic.getMessage(null)); + } + System.out.flush(); + System.err.flush(); + + System.setOut(oldOut); + System.setErr(oldErr); + logger.error("Exception in Interpreter while compilation", baosErr.toString()); + throw new Exception(baosErr.toString()); + } else { + try { + + // creating new class loader + URLClassLoader classLoader = URLClassLoader.newInstance(new URL[] { new File("").toURI() + .toURL() }); + // execute the Main method + Class.forName(generatedClassName, true, classLoader) + .getDeclaredMethod("main", new Class[] { String[].class }) + .invoke(null, new Object[] { null }); + + System.out.flush(); + System.err.flush(); + + // set the stream to old stream + System.setOut(oldOut); + System.setErr(oldErr); + + return baosOut.toString(); + + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException + | InvocationTargetException e) { + logger.error("Exception in Interpreter while execution", e); + System.err.println(e); + e.printStackTrace(newErr); + throw new Exception(baosErr.toString(), e); + + } finally { + + System.out.flush(); + System.err.flush(); + + System.setOut(oldOut); + System.setErr(oldErr); + } + } + + } + +} + +class JavaSourceFromString extends SimpleJavaFileObject { + final String code; + + JavaSourceFromString(String name, String code) { + super(URI.create("string:///" + name.replace('.', '/') + Kind.SOURCE.extension), Kind.SOURCE); + this.code = code; + } + + @Override + public CharSequence getCharContent(boolean ignoreEncodingErrors) { + return code; + } +} diff --git a/beam/src/main/resources/interpreter-setting.json b/beam/src/main/resources/interpreter-setting.json new file mode 100644 index 00000000000..428b76ddd48 --- /dev/null +++ b/beam/src/main/resources/interpreter-setting.json @@ -0,0 +1,35 @@ +[ + { + "group": "beam", + "name": "beam", + "className": "org.apache.zeppelin.beam.BeamInterpreter", + "defaultInterpreter": true, + "properties": { + }, + "editor": { + "editOnDblClick": false + } + }, + { + "group": "beam", + "name": "scio", + "className": "org.apache.zeppelin.scio.ScioInterpreter", + "properties": { + "zeppelin.scio.argz": { + "envName": "ZEPPELIN_SCIO_ARGZ", + "propertyName": "zeppelin.scio.argz", + "defaultValue": "--runner=InProcessPipelineRunner", + "description": "Scio interpreter wide arguments" + }, + "zeppelin.scio.maxResult": { + "envName": "ZEPPELIN_SCIO_MAXRESULT", + "propertyName": "zeppelin.scio.maxResult", + "defaultValue": "1000", + "description": "Max number of SCollection results to display." + } + }, + "editor": { + "language": "scala" + } + } +] diff --git a/beam/src/main/test/org/apache/zeppelin/beam/BeamInterpreterTest.java b/beam/src/main/test/org/apache/zeppelin/beam/BeamInterpreterTest.java new file mode 100644 index 00000000000..1fef7a63993 --- /dev/null +++ b/beam/src/main/test/org/apache/zeppelin/beam/BeamInterpreterTest.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.beam; + +import static org.junit.Assert.assertEquals; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.Properties; +import org.apache.zeppelin.interpreter.InterpreterContext; +import org.apache.zeppelin.interpreter.InterpreterResult; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * + * BeamInterpreterTest + * + */ +public class BeamInterpreterTest { + + private static BeamInterpreter beam; + private static InterpreterContext context; + + @BeforeClass + public static void setUp() { + Properties p = new Properties(); + beam = new BeamInterpreter(p); + beam.open(); + context = new InterpreterContext(null, null, null, null, null, null, null, null, null, null, null, + null); + } + + @AfterClass + public static void tearDown() { + beam.close(); + } + + @Test + public void testStaticRepl() { + + StringWriter writer = new StringWriter(); + PrintWriter out = new PrintWriter(writer); + out.println("public class HelloWorld {"); + out.println(" public static void main(String args[]) {"); + out.println(" System.out.println(\"This is in another java file\");"); + out.println(" }"); + out.println("}"); + out.close(); + + InterpreterResult res = beam.interpret(writer.toString(), context); + + assertEquals(InterpreterResult.Code.SUCCESS, res.code()); + } + + @Test + public void testStaticReplWithoutMain() { + + StringBuffer sourceCode = new StringBuffer(); + sourceCode.append("package org.mdkt;\n"); + sourceCode.append("public class HelloClass {\n"); + sourceCode.append(" public String hello() { return \"hello\"; }"); + sourceCode.append("}"); + InterpreterResult res = beam.interpret(sourceCode.toString(), context); + assertEquals(InterpreterResult.Code.ERROR, res.code()); + } + + @Test + public void testStaticReplWithSyntaxError() { + + StringWriter writer = new StringWriter(); + PrintWriter out = new PrintWriter(writer); + out.println("public class HelloWorld {"); + out.println(" public static void main(String args[]) {"); + out.println(" System.out.prin(\"This is in another java file\");"); + out.println(" }"); + out.println("}"); + out.close(); + InterpreterResult res = beam.interpret(writer.toString(), context); + + assertEquals(InterpreterResult.Code.ERROR, res.code()); + } + +} diff --git a/bigquery/pom.xml b/bigquery/pom.xml index ce16360b10e..f974b988153 100644 --- a/bigquery/pom.xml +++ b/bigquery/pom.xml @@ -23,21 +23,32 @@ zeppelin org.apache.zeppelin - 0.6.2-SNAPSHOT + 0.8.0-SNAPSHOT org.apache.zeppelin zeppelin-bigquery jar - 0.6.2-SNAPSHOT + 0.8.0-SNAPSHOT Zeppelin: BigQuery interpreter + + 1.21.0 + 1.21.0 + UTF-8 + **/BigQueryInterpreterTest.java + + + v2-rev265-1.21.0 + 2.6 + + com.google.apis google-api-services-bigquery - v2-rev265-1.21.0 + ${bigquery.api.version} com.google.oauth-client @@ -57,7 +68,7 @@ com.google.code.gson gson - 2.6 + ${gson.version} @@ -84,18 +95,10 @@ - - 1.21.0 - 1.21.0 - UTF-8 - **/BigQueryInterpreterTest.java - - maven-enforcer-plugin - 1.3.1 enforce @@ -116,7 +119,6 @@ maven-dependency-plugin - 2.8 copy-dependencies diff --git a/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java b/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java index 33e196003b0..d0c23e5fdec 100644 --- a/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java +++ b/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java @@ -332,7 +332,8 @@ public void cancel(InterpreterContext context) { } @Override - public List completion(String buf, int cursor) { + public List completion(String buf, int cursor, + InterpreterContext interpreterContext) { return NO_COMPLETION; } } diff --git a/bigquery/src/main/resources/interpreter-setting.json b/bigquery/src/main/resources/interpreter-setting.json index 3e524ed8362..b99a7639631 100644 --- a/bigquery/src/main/resources/interpreter-setting.json +++ b/bigquery/src/main/resources/interpreter-setting.json @@ -22,6 +22,10 @@ "defaultValue": "100000", "description": "Maximum number of rows to fetch from BigQuery" } + }, + "editor": { + "language": "sql", + "editOnDblClick": false } } ] diff --git a/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java b/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java index add109b60c8..53c4dc30943 100644 --- a/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java +++ b/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java @@ -104,7 +104,7 @@ public void sqlSuccess() { InterpreterResult ret = bqInterpreter.interpret(CONSTANTS.getOne(), context); assertEquals(InterpreterResult.Code.SUCCESS, ret.code()); - assertEquals(ret.type(), InterpreterResult.Type.TABLE); + assertEquals(ret.message().get(0).getType(), InterpreterResult.Type.TABLE); } diff --git a/bin/common.cmd b/bin/common.cmd index b4fb6bf4d23..13f33e5484c 100644 --- a/bin/common.cmd +++ b/bin/common.cmd @@ -29,10 +29,6 @@ if not defined ZEPPELIN_LOG_DIR ( set ZEPPELIN_LOG_DIR=%ZEPPELIN_HOME%\logs ) -if not defined ZEPPELIN_NOTEBOOK_DIR ( - set ZEPPELIN_NOTEBOOK_DIR=%ZEPPELIN_HOME%\notebook -) - if not defined ZEPPELIN_PID_DIR ( set ZEPPELIN_PID_DIR=%ZEPPELIN_HOME%\run ) @@ -47,10 +43,6 @@ if not defined ZEPPELIN_WAR ( ) ) -if not defined ZEPPELIN_INTERPRETER_DIR ( - set ZEPPELIN_INTERPRETER_DIR=%ZEPPELIN_HOME%\interpreter -) - if exist "%ZEPPELIN_CONF_DIR%\zeppelin-env.cmd" ( call "%ZEPPELIN_CONF_DIR%\zeppelin-env.cmd" ) @@ -69,6 +61,10 @@ if not defined ZEPPELIN_MEM ( set ZEPPELIN_MEM=-Xms1024m -Xmx1024m -XX:MaxPermSize=512m ) +if not defined ZEPPELIN_INTP_MEM ( + set ZEPPELIN_INTP_MEM=-Xms1024m -Xmx1024m -XX:MaxPermSize=512m +) + if not defined ZEPPELIN_JAVA_OPTS ( set ZEPPELIN_JAVA_OPTS=-Dfile.encoding=%ZEPPELIN_ENCODING% %ZEPPELIN_MEM% ) else ( @@ -94,10 +90,6 @@ if not defined ZEPPELIN_IDENT_STRING ( set ZEPPELIN_IDENT_STRING=%USERNAME% ) -if not defined DEBUG ( - set DEBUG=0 -) - if not defined ZEPPELIN_INTERPRETER_REMOTE_RUNNER ( set ZEPPELIN_INTERPRETER_REMOTE_RUNNER=bin\interpreter.cmd ) diff --git a/bin/common.sh b/bin/common.sh index b69f28cf0c7..c7100c7d022 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -48,10 +48,6 @@ if [[ -z "${ZEPPELIN_WAR}" ]]; then fi fi -if [[ -z "$ZEPPELIN_INTERPRETER_DIR" ]]; then - export ZEPPELIN_INTERPRETER_DIR="${ZEPPELIN_HOME}/interpreter" -fi - if [[ -f "${ZEPPELIN_CONF_DIR}/zeppelin-env.sh" ]]; then . "${ZEPPELIN_CONF_DIR}/zeppelin-env.sh" fi @@ -113,10 +109,14 @@ if [[ -z "${ZEPPELIN_ENCODING}" ]]; then export ZEPPELIN_ENCODING="UTF-8" fi -if [[ -z "$ZEPPELIN_MEM" ]]; then +if [[ -z "${ZEPPELIN_MEM}" ]]; then export ZEPPELIN_MEM="-Xms1024m -Xmx1024m -XX:MaxPermSize=512m" fi +if [[ -z "${ZEPPELIN_INTP_MEM}" ]]; then + export ZEPPELIN_INTP_MEM="-Xms1024m -Xmx1024m -XX:MaxPermSize=512m" +fi + JAVA_OPTS+=" ${ZEPPELIN_JAVA_OPTS} -Dfile.encoding=${ZEPPELIN_ENCODING} ${ZEPPELIN_MEM}" JAVA_OPTS+=" -Dlog4j.configuration=file://${ZEPPELIN_CONF_DIR}/log4j.properties" export JAVA_OPTS @@ -131,13 +131,12 @@ if [[ -n "${JAVA_HOME}" ]]; then else ZEPPELIN_RUNNER=java fi - export ZEPPELIN_RUNNER if [[ -z "$ZEPPELIN_IDENT_STRING" ]]; then export ZEPPELIN_IDENT_STRING="${USER}" fi -if [[ -z "$DEBUG" ]]; then - export DEBUG=0 +if [[ -z "$ZEPPELIN_INTERPRETER_REMOTE_RUNNER" ]]; then + export ZEPPELIN_INTERPRETER_REMOTE_RUNNER="bin/interpreter.sh" fi diff --git a/bin/install-interpreter.sh b/bin/install-interpreter.sh index 06be75cbf44..d0c03947c68 100755 --- a/bin/install-interpreter.sh +++ b/bin/install-interpreter.sh @@ -40,6 +40,7 @@ fi addJarInDir "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib" addJarInDir "${ZEPPELIN_HOME}/lib" +addJarInDir "${ZEPPELIN_HOME}/lib/interpreter" CLASSPATH+=":${ZEPPELIN_CLASSPATH}" $ZEPPELIN_RUNNER $JAVA_OPTS -cp $CLASSPATH $ZEPPELIN_INSTALL_INTERPRETER_MAIN ${@} diff --git a/bin/interpreter.cmd b/bin/interpreter.cmd index fd6af3df224..eb59799952f 100644 --- a/bin/interpreter.cmd +++ b/bin/interpreter.cmd @@ -39,11 +39,6 @@ call "%bin%\common.cmd" if exist "%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes" ( set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes" -) else ( - for %%d in ("%ZEPPELIN_HOME%\lib\zeppelin-interpreter*.jar") do ( - set ZEPPELIN_INTERPRETER_JAR=%%d - ) - set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"!ZEPPELIN_INTERPRETER_JAR!" ) REM add test classes for unittest @@ -55,6 +50,7 @@ if exist "%ZEPPELIN_HOME%\zeppelin-zengine\target\test-classes" ( ) call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-interpreter\target\lib" +call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\lib\interpreter" call "%bin%\functions.cmd" ADDJARINDIR "%INTERPRETER_DIR%" set HOSTNAME=%COMPUTERNAME% diff --git a/bin/interpreter.sh b/bin/interpreter.sh index a81c8f21067..29d02218d80 100755 --- a/bin/interpreter.sh +++ b/bin/interpreter.sh @@ -20,10 +20,10 @@ bin=$(dirname "${BASH_SOURCE-$0}") bin=$(cd "${bin}">/dev/null; pwd) function usage() { - echo "usage) $0 -p -d -l " + echo "usage) $0 -p -d -l -g " } -while getopts "hp:d:l:v" o; do +while getopts "hp:d:l:v:u:g:" o; do case ${o} in h) usage @@ -42,6 +42,17 @@ while getopts "hp:d:l:v" o; do . "${bin}/common.sh" getZeppelinVersion ;; + u) + ZEPPELIN_IMPERSONATE_USER="${OPTARG}" + if [[ -z "$ZEPPELIN_IMPERSONATE_CMD" ]]; then + ZEPPELIN_IMPERSONATE_RUN_CMD=`echo "ssh ${ZEPPELIN_IMPERSONATE_USER}@localhost" ` + else + ZEPPELIN_IMPERSONATE_RUN_CMD=$(eval "echo ${ZEPPELIN_IMPERSONATE_CMD} ") + fi + ;; + g) + INTERPRETER_GROUP_NAME=${OPTARG} + ;; esac done @@ -53,14 +64,11 @@ fi . "${bin}/common.sh" -ZEPPELIN_INTP_CLASSPATH="" +ZEPPELIN_INTP_CLASSPATH="${CLASSPATH}" # construct classpath if [[ -d "${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes" ]]; then ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes" -else - ZEPPELIN_INTERPRETER_JAR="$(ls ${ZEPPELIN_HOME}/lib/zeppelin-interpreter*.jar)" - ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_INTERPRETER_JAR}" fi # add test classes for unittest @@ -71,8 +79,8 @@ if [[ -d "${ZEPPELIN_HOME}/zeppelin-zengine/target/test-classes" ]]; then ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-zengine/target/test-classes" fi - addJarInDirForIntp "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib" +addJarInDirForIntp "${ZEPPELIN_HOME}/lib/interpreter" addJarInDirForIntp "${INTERPRETER_DIR}" HOSTNAME=$(hostname) @@ -80,7 +88,14 @@ ZEPPELIN_SERVER=org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer INTERPRETER_ID=$(basename "${INTERPRETER_DIR}") ZEPPELIN_PID="${ZEPPELIN_PID_DIR}/zeppelin-interpreter-${INTERPRETER_ID}-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.pid" -ZEPPELIN_LOGFILE="${ZEPPELIN_LOG_DIR}/zeppelin-interpreter-${INTERPRETER_ID}-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.log" +ZEPPELIN_LOGFILE="${ZEPPELIN_LOG_DIR}/zeppelin-interpreter-" +if [[ ! -z "$INTERPRETER_GROUP_NAME" ]]; then + ZEPPELIN_LOGFILE+="${INTERPRETER_GROUP_NAME}-" +fi +if [[ ! -z "$ZEPPELIN_IMPERSONATE_USER" ]]; then + ZEPPELIN_LOGFILE+="${ZEPPELIN_IMPERSONATE_USER}-" +fi +ZEPPELIN_LOGFILE+="${INTERPRETER_ID}-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.log" JAVA_INTP_OPTS+=" -Dzeppelin.log.file=${ZEPPELIN_LOGFILE}" if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then @@ -149,21 +164,61 @@ elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then else echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded" fi +elif [[ "${INTERPRETER_ID}" == "pig" ]]; then + # autodetect HADOOP_CONF_HOME by heuristic + if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then + if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then + export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop" + elif [[ -d "/etc/hadoop/conf" ]]; then + export HADOOP_CONF_DIR="/etc/hadoop/conf" + fi + fi + + if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then + ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}" + fi + + # autodetect TEZ_CONF_DIR + if [[ -n "${TEZ_CONF_DIR}" ]]; then + ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}" + elif [[ -d "/etc/tez/conf" ]]; then + ZEPPELIN_INTP_CLASSPATH+=":/etc/tez/conf" + else + echo "TEZ_CONF_DIR is not set, configuration might not be loaded" + fi fi addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}" -CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}" +if [[ ! -z "$ZEPPELIN_IMPERSONATE_USER" ]]; then + suid="$(id -u ${ZEPPELIN_IMPERSONATE_USER})" + if [[ -n "${suid}" || -z "${SPARK_SUBMIT}" ]]; then + INTERPRETER_RUN_COMMAND=${ZEPPELIN_IMPERSONATE_RUN_CMD}" '" + if [[ -f "${ZEPPELIN_CONF_DIR}/zeppelin-env.sh" ]]; then + INTERPRETER_RUN_COMMAND+=" source "${ZEPPELIN_CONF_DIR}'/zeppelin-env.sh;' + fi + fi +fi if [[ -n "${SPARK_SUBMIT}" ]]; then - ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}" --driver-java-options "${JAVA_INTP_OPTS}" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT} & + if [[ -n "$ZEPPELIN_IMPERSONATE_USER" ]] && [[ "$ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER" != "false" ]]; then + INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} --proxy-user ${ZEPPELIN_IMPERSONATE_USER} ${SPARK_APP_JAR} ${PORT}` + else + INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT}` + fi else - ${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} & + INTERPRETER_RUN_COMMAND+=' '` echo ${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} ` fi +if [[ ! -z "$ZEPPELIN_IMPERSONATE_USER" ]] && [[ -n "${suid}" || -z "${SPARK_SUBMIT}" ]]; then + INTERPRETER_RUN_COMMAND+="'" +fi + +eval $INTERPRETER_RUN_COMMAND & + pid=$! if [[ -z "${pid}" ]]; then - return 1; + exit 1; else echo ${pid} > ${ZEPPELIN_PID} fi diff --git a/bin/zeppelin-daemon.sh b/bin/zeppelin-daemon.sh index c1d5ef20715..e88c26fc43c 100755 --- a/bin/zeppelin-daemon.sh +++ b/bin/zeppelin-daemon.sh @@ -72,6 +72,7 @@ fi addJarInDir "${ZEPPELIN_HOME}" addJarInDir "${ZEPPELIN_HOME}/lib" +addJarInDir "${ZEPPELIN_HOME}/lib/interpreter" addJarInDir "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib" addJarInDir "${ZEPPELIN_HOME}/zeppelin-zengine/target/lib" addJarInDir "${ZEPPELIN_HOME}/zeppelin-server/target/lib" @@ -161,6 +162,8 @@ function upstart() { # where the service manager starts and stops the process initialize_default_directories + echo "ZEPPELIN_CLASSPATH: ${ZEPPELIN_CLASSPATH_OVERRIDES}:${CLASSPATH}" >> "${ZEPPELIN_OUTFILE}" + $ZEPPELIN_RUNNER $JAVA_OPTS -cp $ZEPPELIN_CLASSPATH_OVERRIDES:$CLASSPATH $ZEPPELIN_MAIN >> "${ZEPPELIN_OUTFILE}" } @@ -177,6 +180,8 @@ function start() { initialize_default_directories + echo "ZEPPELIN_CLASSPATH: ${ZEPPELIN_CLASSPATH_OVERRIDES}:${CLASSPATH}" >> "${ZEPPELIN_OUTFILE}" + nohup nice -n $ZEPPELIN_NICENESS $ZEPPELIN_RUNNER $JAVA_OPTS -cp $ZEPPELIN_CLASSPATH_OVERRIDES:$CLASSPATH $ZEPPELIN_MAIN >> "${ZEPPELIN_OUTFILE}" 2>&1 < /dev/null & pid=$! if [[ -z "${pid}" ]]; then @@ -254,6 +259,7 @@ case "${1}" in start ;; restart) + echo "${ZEPPELIN_NAME} is restarting" >> "${ZEPPELIN_OUTFILE}" stop start ;; diff --git a/bin/zeppelin.cmd b/bin/zeppelin.cmd index a2d5644c276..682caf061d2 100644 --- a/bin/zeppelin.cmd +++ b/bin/zeppelin.cmd @@ -56,6 +56,7 @@ if exist "%ZEPPELIN_HOME%\zeppelin-server\target\classes" ( call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%" call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\lib" +call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\lib\interpreter" call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-interpreter\target\lib" call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-zengine\target\lib" call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-server\target\lib" @@ -83,9 +84,4 @@ if not exist %ZEPPELIN_PID_DIR% ( mkdir "%ZEPPELIN_PID_DIR%" ) -if not exist %ZEPPELIN_NOTEBOOK_DIR% ( - echo Notebook dir doesn't exist, create %ZEPPELIN_NOTEBOOK_DIR% - mkdir "%ZEPPELIN_NOTEBOOK_DIR%" -) - "%ZEPPELIN_RUNNER%" %JAVA_OPTS% -cp %CLASSPATH% %ZEPPELIN_SERVER% "%*" diff --git a/bin/zeppelin.sh b/bin/zeppelin.sh index 92d7f7ead96..44fc2cfe89a 100755 --- a/bin/zeppelin.sh +++ b/bin/zeppelin.sh @@ -65,12 +65,13 @@ fi addJarInDir "${ZEPPELIN_HOME}" addJarInDir "${ZEPPELIN_HOME}/lib" +addJarInDir "${ZEPPELIN_HOME}/lib/interpreter" addJarInDir "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib" addJarInDir "${ZEPPELIN_HOME}/zeppelin-zengine/target/lib" addJarInDir "${ZEPPELIN_HOME}/zeppelin-server/target/lib" addJarInDir "${ZEPPELIN_HOME}/zeppelin-web/target/lib" -CLASSPATH+=":${ZEPPELIN_CLASSPATH}" +ZEPPELIN_CLASSPATH="$CLASSPATH:$ZEPPELIN_CLASSPATH" if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then echo "Log dir doesn't exist, create ${ZEPPELIN_LOG_DIR}" @@ -82,9 +83,4 @@ if [[ ! -d "${ZEPPELIN_PID_DIR}" ]]; then $(mkdir -p "${ZEPPELIN_PID_DIR}") fi -if [[ ! -d "${ZEPPELIN_NOTEBOOK_DIR}" ]]; then - echo "Pid dir doesn't exist, create ${ZEPPELIN_NOTEBOOK_DIR}" - $(mkdir -p "${ZEPPELIN_NOTEBOOK_DIR}") -fi - -exec $ZEPPELIN_RUNNER $JAVA_OPTS -cp $ZEPPELIN_CLASSPATH_OVERRIDES:$CLASSPATH $ZEPPELIN_SERVER "$@" +exec $ZEPPELIN_RUNNER $JAVA_OPTS -cp $ZEPPELIN_CLASSPATH_OVERRIDES:${ZEPPELIN_CLASSPATH} $ZEPPELIN_SERVER "$@" diff --git a/cassandra/pom.xml b/cassandra/pom.xml index 66a18e17203..05108e9fd48 100644 --- a/cassandra/pom.xml +++ b/cassandra/pom.xml @@ -22,14 +22,14 @@ zeppelin org.apache.zeppelin - 0.6.2-SNAPSHOT + 0.8.0-SNAPSHOT .. org.apache.zeppelin zeppelin-cassandra_2.10 jar - 0.6.2-SNAPSHOT + 0.8.0-SNAPSHOT Zeppelin: Apache Cassandra interpreter Zeppelin cassandra support @@ -41,11 +41,14 @@ 1.7.1 16.0.1 - - 4.12 + 3.2.4-Zeppelin - 1.7.0 - 1.9.5 + 4.2.0 + + + 2.15.2 + 1.0 + 1.7.1 @@ -116,7 +119,7 @@ ${scalate.version} - + org.scalatest scalatest_${scala.binary.version} @@ -131,6 +134,13 @@ test + + net.java.dev.jna + jna + ${jna.version} + test + + info.archinnov achilles-embedded @@ -145,20 +155,22 @@ ch.qos.logback logback-classic + + net.java.dev.jna + jna + org.mockito mockito-core - ${mockito.version} test org.assertj assertj-core - ${assertj.version} test @@ -169,7 +181,7 @@ org.scala-tools maven-scala-plugin - 2.15.2 + ${plugin.scala.version} compile @@ -197,7 +209,7 @@ org.scalatest scalatest-maven-plugin - 1.0 + ${plugin.scalatest.version} test @@ -211,7 +223,7 @@ org.scalatra.scalate maven-scalate-plugin_${scala.binary.version} - 1.7.1 + ${plugin.scalate.version} compile @@ -229,7 +241,6 @@ maven-enforcer-plugin - 1.3.1 enforce @@ -240,7 +251,6 @@ maven-dependency-plugin - 2.8 copy-dependencies diff --git a/cassandra/src/main/java/org/apache/zeppelin/cassandra/CassandraInterpreter.java b/cassandra/src/main/java/org/apache/zeppelin/cassandra/CassandraInterpreter.java index 147fb62c735..5eb3a03313c 100644 --- a/cassandra/src/main/java/org/apache/zeppelin/cassandra/CassandraInterpreter.java +++ b/cassandra/src/main/java/org/apache/zeppelin/cassandra/CassandraInterpreter.java @@ -151,111 +151,6 @@ public CassandraInterpreter(Properties properties) { super(properties); } - static { - LOGGER.info("Bootstrapping Cassandra Interpreter"); - Interpreter.register("cassandra", "cassandra", CassandraInterpreter.class.getName(), - new InterpreterPropertyBuilder() - .add(CASSANDRA_HOSTS, DEFAULT_HOST, - "Comma separated Cassandra hosts (DNS name or " + - "IP address). Default = localhost. Ex: '192.168.0.12,node2,node3'") - .add(CASSANDRA_PORT, DEFAULT_PORT, "Cassandra native port. Default = 9042") - .add(CASSANDRA_PROTOCOL_VERSION, DEFAULT_PROTOCOL_VERSION, - "Cassandra protocol version. Default = 4") - .add(CASSANDRA_CLUSTER_NAME, DEFAULT_CLUSTER, "Cassandra cluster name. " + - "Default = 'Test Cluster'") - .add(CASSANDRA_KEYSPACE_NAME, DEFAULT_KEYSPACE, "Cassandra keyspace name. " + - "Default = 'system'") - .add(CASSANDRA_COMPRESSION_PROTOCOL, DEFAULT_COMPRESSION, - "Cassandra compression protocol. " + - "Available values: NONE, SNAPPY, LZ4. Default = NONE") - .add(CASSANDRA_CREDENTIALS_USERNAME, DEFAULT_CREDENTIAL, - "Cassandra credentials username. " + - "Default = 'none'") - .add(CASSANDRA_CREDENTIALS_PASSWORD, DEFAULT_CREDENTIAL, - "Cassandra credentials password. " + - "Default = 'none'") - .add(CASSANDRA_LOAD_BALANCING_POLICY, DEFAULT_POLICY, "Cassandra Load Balancing Policy. " + - "Default = new TokenAwarePolicy(new DCAwareRoundRobinPolicy())") - .add(CASSANDRA_RETRY_POLICY, DEFAULT_POLICY, "Cassandra Retry Policy. " + - "Default = DefaultRetryPolicy.INSTANCE") - .add(CASSANDRA_RECONNECTION_POLICY, DEFAULT_POLICY, "Cassandra Reconnection Policy. " + - "Default = new ExponentialReconnectionPolicy(1000, 10 * 60 * 1000)") - .add(CASSANDRA_SPECULATIVE_EXECUTION_POLICY, DEFAULT_POLICY, - "Cassandra Speculative Execution Policy. " + - "Default = NoSpeculativeExecutionPolicy.INSTANCE") - .add(CASSANDRA_INTERPRETER_PARALLELISM, DEFAULT_PARALLELISM, - "Cassandra interpreter parallelism" + - ".Default = 10") - .add(CASSANDRA_MAX_SCHEMA_AGREEMENT_WAIT_SECONDS, - DEFAULT_MAX_SCHEMA_AGREEMENT_WAIT_SECONDS + "" - , "Cassandra max schema agreement wait in second" + - ".Default = ProtocolOptions.DEFAULT_MAX_SCHEMA_AGREEMENT_WAIT_SECONDS") - - .add(CASSANDRA_POOLING_NEW_CONNECTION_THRESHOLD_LOCAL, - DEFAULT_NEW_CONNECTION_THRESHOLD_LOCAL, - "Cassandra new connection threshold local. " + - "Protocol V2 and below default = 100" + - "Protocol V3 and above default = 800") - .add(CASSANDRA_POOLING_NEW_CONNECTION_THRESHOLD_REMOTE, - DEFAULT_NEW_CONNECTION_THRESHOLD_REMOTE, - "Cassandra new connection threshold remove. " + - "Protocol V2 and below default = 100" + - "Protocol V3 and above default = 200") - - .add(CASSANDRA_POOLING_CORE_CONNECTION_PER_HOST_LOCAL, - DEFAULT_CORE_CONNECTION_PER_HOST_LOCAL, - "Cassandra core connection per host local. " + - "Protocol V2 and below default = 2" + - "Protocol V3 and above default = 1") - .add(CASSANDRA_POOLING_CORE_CONNECTION_PER_HOST_REMOTE, - DEFAULT_CORE_CONNECTION_PER_HOST_REMOTE, - "Cassandra core connection per host remove. " + - "Protocol V2 and below default = 1" + - "Protocol V3 and above default = 1") - - .add(CASSANDRA_POOLING_MAX_CONNECTION_PER_HOST_LOCAL, - DEFAULT_MAX_CONNECTION_PER_HOST_LOCAL, - "Cassandra max connection per host local. " + - "Protocol V2 and below default = 8" + - "Protocol V3 and above default = 1") - .add(CASSANDRA_POOLING_MAX_CONNECTION_PER_HOST_REMOTE, - DEFAULT_MAX_CONNECTION_PER_HOST_REMOTE, - "Cassandra max connection per host remote. " + - "Protocol V2 and below default = 2" + - "Protocol V3 and above default = 1") - - .add(CASSANDRA_POOLING_MAX_REQUESTS_PER_CONNECTION_LOCAL, - DEFAULT_MAX_REQUEST_PER_CONNECTION_LOCAL, - "Cassandra max request per connection local. " + - "Protocol V2 and below default = 128" + - "Protocol V3 and above default = 1024") - .add(CASSANDRA_POOLING_MAX_REQUESTS_PER_CONNECTION_REMOTE, - DEFAULT_MAX_REQUEST_PER_CONNECTION_REMOTE, - "Cassandra max request per connection remote. " + - "Protocol V2 and below default = 128" + - "Protocol V3 and above default = 256") - - .add(CASSANDRA_POOLING_IDLE_TIMEOUT_SECONDS, DEFAULT_IDLE_TIMEOUT, - "Cassandra idle time out in seconds. Default = 120") - .add(CASSANDRA_POOLING_POOL_TIMEOUT_MILLIS, DEFAULT_POOL_TIMEOUT, - "Cassandra pool time out in millisecs. Default = 5000") - .add(CASSANDRA_POOLING_HEARTBEAT_INTERVAL_SECONDS, DEFAULT_HEARTBEAT_INTERVAL, - "Cassandra pool heartbeat interval in secs. Default = 30") - .add(CASSANDRA_QUERY_DEFAULT_CONSISTENCY, DEFAULT_CONSISTENCY, - "Cassandra query default consistency level. Default = ONE") - .add(CASSANDRA_QUERY_DEFAULT_SERIAL_CONSISTENCY, DEFAULT_SERIAL_CONSISTENCY, - "Cassandra query default serial consistency level. Default = SERIAL") - .add(CASSANDRA_QUERY_DEFAULT_FETCH_SIZE, DEFAULT_FETCH_SIZE, - "Cassandra query default fetch size. Default = 5000") - .add(CASSANDRA_SOCKET_CONNECTION_TIMEOUT_MILLIS, DEFAULT_CONNECTION_TIMEOUT, - "Cassandra socket default connection timeout in millisecs. Default = 5000") - .add(CASSANDRA_SOCKET_READ_TIMEOUT_MILLIS, DEFAULT_READ_TIMEOUT, - "Cassandra socket read timeout in millisecs. Default = 12000") - .add(CASSANDRA_SOCKET_TCP_NO_DELAY, DEFAULT_TCP_NO_DELAY, - "Cassandra socket TCP no delay. Default = true") - .build()); - } - @Override public void open() { @@ -321,7 +216,8 @@ public int getProgress(InterpreterContext context) { } @Override - public List completion(String buf, int cursor) { + public List completion(String buf, int cursor, + InterpreterContext interpreterContext) { return NO_COMPLETION; } @@ -331,10 +227,4 @@ public Scheduler getScheduler() { .createOrGetParallelScheduler(CassandraInterpreter.class.getName() + this.hashCode(), parseInt(getProperty(CASSANDRA_INTERPRETER_PARALLELISM))); } - - @Override - public void destroy() { - super.destroy(); - this.close(); - } } diff --git a/cassandra/src/main/resources/interpreter-setting.json b/cassandra/src/main/resources/interpreter-setting.json new file mode 100644 index 00000000000..3df120d985c --- /dev/null +++ b/cassandra/src/main/resources/interpreter-setting.json @@ -0,0 +1,198 @@ +[ + { + "group": "cassandra", + "name": "cassandra", + "className": "org.apache.zeppelin.cassandra.CassandraInterpreter", + "properties": { + "cassandra.hosts": { + "envName": null, + "propertyName": "cassandra.hosts", + "defaultValue": "localhost", + "description": "Comma separated Cassandra hosts (DNS name or IP address). Default = localhost. Ex: '192.168.0.12,node2,node3'" + }, + "cassandra.native.port": { + "envName": null, + "propertyName": "cassandra.native.port", + "defaultValue": "9042", + "description": "Cassandra native port. Default = 9042" + }, + "cassandra.protocol.version": { + "envName": null, + "propertyName": "cassandra.protocol.version", + "defaultValue": "4", + "description": "Cassandra protocol version. Default = 4" + }, + "cassandra.cluster": { + "envName": null, + "propertyName": "cassandra.cluster", + "defaultValue": "Test Cluster", + "description": "Cassandra cluster name. Default = 'Test Cluster'" + }, + "cassandra.keyspace": { + "envName": null, + "propertyName": "cassandra.keyspace", + "defaultValue": "system", + "description": "Cassandra keyspace name. Default = 'system'" + }, + "cassandra.compression.protocol": { + "envName": null, + "propertyName": "cassandra.compression.protocol", + "defaultValue": "NONE", + "description": "Cassandra compression protocol. Available values: NONE, SNAPPY, LZ4. Default = NONE" + }, + "cassandra.credentials.username": { + "envName": null, + "propertyName": "cassandra.credentials.username", + "defaultValue": "none", + "description": "Cassandra credentials username. Default = 'none'" + }, + "cassandra.credentials.password": { + "envName": null, + "propertyName": "cassandra.credentials.password", + "defaultValue": "none", + "description": "Cassandra credentials password. Default = 'none'" + }, + "cassandra.load.balancing.policy": { + "envName": null, + "propertyName": "cassandra.load.balancing.policy", + "defaultValue": "DEFAULT", + "description": "Cassandra Load Balancing Policy. Default = new TokenAwarePolicy(new DCAwareRoundRobinPolicy())" + }, + "cassandra.retry.policy": { + "envName": null, + "propertyName": "cassandra.retry.policy", + "defaultValue": "DEFAULT", + "description": "Cassandra Retry Policy. Default = DefaultRetryPolicy.INSTANCE" + }, + "cassandra.reconnection.policy": { + "envName": null, + "propertyName": "cassandra.reconnection.policy", + "defaultValue": "DEFAULT", + "description": "Cassandra Reconnection Policy. Default = new ExponentialReconnectionPolicy(1000, 10 * 60 * 1000)" + }, + "cassandra.speculative.execution.policy": { + "envName": null, + "propertyName": "cassandra.speculative.execution.policy", + "defaultValue": "DEFAULT", + "description": "Cassandra Speculative Execution Policy. Default = NoSpeculativeExecutionPolicy.INSTANCE" + }, + "cassandra.interpreter.parallelism": { + "envName": null, + "propertyName": "cassandra.interpreter.parallelism", + "defaultValue": "10", + "description": "Cassandra interpreter parallelism.Default = 10" + }, + "cassandra.max.schema.agreement.wait.second": { + "envName": null, + "propertyName": "cassandra.max.schema.agreement.wait.second", + "defaultValue": "10", + "description": "Cassandra max schema agreement wait in second.Default = ProtocolOptions.DEFAULT_MAX_SCHEMA_AGREEMENT_WAIT_SECONDS" + }, + "cassandra.pooling.new.connection.threshold.local": { + "envName": null, + "propertyName": "cassandra.pooling.new.connection.threshold.local", + "defaultValue": "100", + "description": "Cassandra new connection threshold local. Protocol V2 and below default = 100 Protocol V3 and above default = 800" + }, + "cassandra.pooling.new.connection.threshold.remote": { + "envName": null, + "propertyName": "cassandra.pooling.new.connection.threshold.remote", + "defaultValue": "100", + "description": "Cassandra new connection threshold remove. Protocol V2 and below default = 100 Protocol V3 and above default = 200" + }, + "cassandra.pooling.core.connection.per.host.local": { + "envName": null, + "propertyName": "cassandra.pooling.core.connection.per.host.local", + "defaultValue": "2", + "description": "Cassandra core connection per host local. Protocol V2 and below default = 2 Protocol V3 and above default = 1" + }, + "cassandra.pooling.core.connection.per.host.remote": { + "envName": null, + "propertyName": "cassandra.pooling.core.connection.per.host.remote", + "defaultValue": "1", + "description": "Cassandra core connection per host remove. Protocol V2 and below default = 1 Protocol V3 and above default = 1" + }, + "cassandra.pooling.max.connection.per.host.local": { + "envName": null, + "propertyName": "cassandra.pooling.max.connection.per.host.local", + "defaultValue": "8", + "description": "Cassandra max connection per host local. Protocol V2 and below default = 8 Protocol V3 and above default = 1" + }, + "cassandra.pooling.max.connection.per.host.remote": { + "envName": null, + "propertyName": "cassandra.pooling.max.connection.per.host.remote", + "defaultValue": "2", + "description": "Cassandra max connection per host remote. Protocol V2 and below default = 2 Protocol V3 and above default = 1" + }, + "cassandra.pooling.max.request.per.connection.local": { + "envName": null, + "propertyName": "cassandra.pooling.max.request.per.connection.local", + "defaultValue": "1024", + "description": "Cassandra max request per connection local. Protocol V2 and below default = 128 Protocol V3 and above default = 1024" + }, + "cassandra.pooling.max.request.per.connection.remote": { + "envName": null, + "propertyName": "cassandra.pooling.max.request.per.connection.remote", + "defaultValue": "256", + "description": "Cassandra max request per connection remote. Protocol V2 and below default = 128 Protocol V3 and above default = 256" + }, + "cassandra.pooling.idle.timeout.seconds": { + "envName": null, + "propertyName": "cassandra.pooling.idle.timeout.seconds", + "defaultValue": "120", + "description": "Cassandra idle time out in seconds. Default = 120" + }, + "cassandra.pooling.pool.timeout.millisecs": { + "envName": null, + "propertyName": "cassandra.pooling.pool.timeout.millisecs", + "defaultValue": "5000", + "description": "Cassandra pool time out in millisecs. Default = 5000" + }, + "cassandra.pooling.heartbeat.interval.seconds": { + "envName": null, + "propertyName": "cassandra.pooling.heartbeat.interval.seconds", + "defaultValue": "30", + "description": "Cassandra pool heartbeat interval in secs. Default = 30" + }, + "cassandra.query.default.consistency": { + "envName": null, + "propertyName": "cassandra.query.default.consistency", + "defaultValue": "ONE", + "description": "Cassandra query default consistency level. Default = ONE" + }, + "cassandra.query.default.serial.consistency": { + "envName": null, + "propertyName": "cassandra.query.default.serial.consistency", + "defaultValue": "SERIAL", + "description": "Cassandra query default serial consistency level. Default = SERIAL" + }, + "cassandra.query.default.fetchSize": { + "envName": null, + "propertyName": "cassandra.query.default.fetchSize", + "defaultValue": "5000", + "description": "Cassandra query default fetch size. Default = 5000" + }, + "cassandra.socket.connection.timeout.millisecs": { + "envName": null, + "propertyName": "cassandra.socket.connection.timeout.millisecs", + "defaultValue": "5000", + "description": "Cassandra socket default connection timeout in millisecs. Default = 5000" + }, + "cassandra.socket.read.timeout.millisecs": { + "envName": null, + "propertyName": "cassandra.socket.read.timeout.millisecs", + "defaultValue": "12000", + "description": "Cassandra socket read timeout in millisecs. Default = 12000" + }, + "cassandra.socket.tcp.no_delay": { + "envName": null, + "propertyName": "cassandra.socket.tcp.no_delay", + "defaultValue": "true", + "description": "Cassandra socket TCP no delay. Default = true" + } + }, + "editor": { + "editOnDblClick": false + } + } +] diff --git a/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala b/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala index 363da7b1d22..c83a186a9c6 100644 --- a/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala +++ b/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala @@ -30,7 +30,7 @@ import com.datastax.driver.core.exceptions.DriverException import com.datastax.driver.core.policies.{LoggingRetryPolicy, FallthroughRetryPolicy, DowngradingConsistencyRetryPolicy, Policies} import org.apache.zeppelin.cassandra.TextBlockHierarchy._ import org.apache.zeppelin.display.AngularObjectRegistry -import org.apache.zeppelin.display.Input.ParamOption +import org.apache.zeppelin.display.ui.OptionInput.ParamOption import org.apache.zeppelin.interpreter.InterpreterResult.Code import org.apache.zeppelin.interpreter.{InterpreterException, InterpreterResult, InterpreterContext} import org.slf4j.LoggerFactory diff --git a/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java b/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java index db3c3914683..cf392bb428c 100644 --- a/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java +++ b/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java @@ -143,7 +143,7 @@ public void should_interpret_simple_select() throws Exception { //Then assertThat(actual).isNotNull(); assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message()).isEqualTo("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" + + assertThat(actual.message().get(0).getData()).isEqualTo("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" + "Bogdan Raczynski\t1977-01-01\tPoland\tnull\tMale\t[Dance, Electro]\tPerson\n" + "Krishna Das\t1947-05-31\tUSA\tnull\tMale\t[Unknown]\tPerson\n" + "Sheryl Crow\t1962-02-11\tUSA\tnull\tFemale\t[Classic, Rock, Country, Blues, Pop, Folk]\tPerson\n" + @@ -166,7 +166,7 @@ public void should_interpret_select_statement() throws Exception { //Then assertThat(actual).isNotNull(); assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message()).isEqualTo("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" + + assertThat(actual.message().get(0).getData()).isEqualTo("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" + "Bogdan Raczynski\t1977-01-01\tPoland\tnull\tMale\t[Dance, Electro]\tPerson\n" + "Krishna Das\t1947-05-31\tUSA\tnull\tMale\t[Unknown]\tPerson\n"); @@ -191,7 +191,7 @@ public void should_interpret_multiple_statements_with_single_line_logged_batch() //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message()).isEqualTo("title\tartist\tyear\n" + + assertThat(actual.message().get(0).getData()).isEqualTo("title\tartist\tyear\n" + "The Impossible Dream EP\tCarter the Unstoppable Sex Machine\t1992\n" + "The Way You Are\tTears for Fears\t1983\n" + "Primitive\tSoulfly\t2003\n"); @@ -207,7 +207,7 @@ public void should_throw_statement_not_having_semi_colon() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message()) + assertThat(actual.message().get(0).getData()) .contains("Error parsing input:\n" + "\t'SELECT * zeppelin.albums'\n" + "Did you forget to add ; (semi-colon) at the end of each CQL statement ?"); @@ -223,7 +223,7 @@ public void should_validate_statement() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message()).contains("line 1:9 missing K_FROM at 'zeppelin' (SELECT * [zeppelin]....)"); + assertThat(actual.message().get(0).getData()).contains("line 1:9 missing K_FROM at 'zeppelin' (SELECT * [zeppelin]....)"); } @Test @@ -237,7 +237,7 @@ public void should_execute_statement_with_consistency_option() throws Exception //Then assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message()) + assertThat(actual.message().get(0).getData()) .contains("Not enough replicas available for query at consistency THREE (3 required but only 1 alive)"); } @@ -315,7 +315,7 @@ public void should_execute_prepared_and_bound_statements() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message()).isEqualTo("key\tval\n" + + assertThat(actual.message().get(0).getData()).isEqualTo("key\tval\n" + "myKey\tmyValue\n"); } @@ -336,7 +336,7 @@ public void should_execute_bound_statement() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message()).isEqualTo( + assertThat(actual.message().get(0).getData()).isEqualTo( "login\taddresses\tage\tdeceased\tfirstname\tlast_update\tlastname\tlocation\n" + "jdoe\t" + "{street_number:3,street_name:'Beverly Hills Bld',zip_code:90209," + @@ -359,7 +359,7 @@ public void should_exception_when_executing_unknown_bound_statement() throws Exc //Then assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message()) + assertThat(actual.message().get(0).getData()) .isEqualTo("The statement 'select_users' can not be bound to values. " + "Are you sure you did prepare it with @prepare[select_users] ?"); } @@ -381,7 +381,7 @@ public void should_extract_variable_from_statement() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message()).isEqualTo("firstname\tlastname\tage\n" + + assertThat(actual.message().get(0).getData()).isEqualTo("firstname\tlastname\tage\n" + "Helen\tSUE\t27\n"); } @@ -398,7 +398,7 @@ public void should_just_prepare_statement() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message())).isEqualTo(expected); + assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); } @Test @@ -412,7 +412,7 @@ public void should_execute_bound_statement_with_no_bound_value() throws Exceptio //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message()).isEqualTo("name\tcountry\tstyles\n" + + assertThat(actual.message().get(0).getData()).isEqualTo("name\tcountry\tstyles\n" + "Bogdan Raczynski\tPoland\t[Dance, Electro]\n" + "Krishna Das\tUSA\t[Unknown]\n" + "Sheryl Crow\tUSA\t[Classic, Rock, Country, Blues, Pop, Folk]\n"); @@ -430,7 +430,7 @@ public void should_parse_date_value() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message()).contains("last_update\n" + + assertThat(actual.message().get(0).getData()).contains("last_update\n" + "Thu Jul 30 12:00:01"); } @@ -445,7 +445,7 @@ public void should_bind_null_value() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message()).isEqualTo("firstname\tlastname\n" + + assertThat(actual.message().get(0).getData()).isEqualTo("firstname\tlastname\n" + "null\tNULL\n"); } @@ -460,7 +460,7 @@ public void should_bind_boolean_value() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message()).isEqualTo("login\tdeceased\n" + + assertThat(actual.message().get(0).getData()).isEqualTo("login\tdeceased\n" + "bind_bool\tfalse\n"); } @@ -477,7 +477,7 @@ public void should_fail_when_executing_a_removed_prepared_statement() throws Exc //Then assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message()).isEqualTo("The statement 'to_be_removed' can not be bound to values. " + + assertThat(actual.message().get(0).getData()).isEqualTo("The statement 'to_be_removed' can not be bound to values. " + "Are you sure you did prepare it with @prepare[to_be_removed] ?"); } @@ -500,7 +500,7 @@ public void should_display_statistics_for_non_select_statement() throws Exceptio assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message())).isEqualTo(expected); + assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); } @Test @@ -513,7 +513,7 @@ public void should_error_and_display_stack_trace() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message()).contains("All host(s) tried for query failed"); + assertThat(actual.message().get(0).getData()).contains("All host(s) tried for query failed"); } @Test @@ -530,7 +530,7 @@ public void should_describe_cluster() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message())).isEqualTo(expected); + assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); } @Test @@ -546,7 +546,7 @@ public void should_describe_keyspaces() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message())).isEqualTo(expected); + assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); } @Test @@ -562,7 +562,7 @@ public void should_describe_keyspace() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message())).isEqualTo(expected); + assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); } @Test @@ -648,7 +648,7 @@ public void should_describe_table() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message())).isEqualTo(expected); + assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); } @Test @@ -664,7 +664,7 @@ public void should_describe_udt() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message())).isEqualTo(expected); + assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); } @Test @@ -681,7 +681,7 @@ public void should_describe_udt_withing_logged_in_keyspace() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message())).isEqualTo(expected); + assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); } @Test @@ -695,7 +695,7 @@ public void should_error_describing_non_existing_table() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message()).contains("Cannot find table system.complex_table"); + assertThat(actual.message().get(0).getData()).contains("Cannot find table system.complex_table"); } @Test @@ -709,7 +709,7 @@ public void should_error_describing_non_existing_udt() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message()).contains("Cannot find type system.address"); + assertThat(actual.message().get(0).getData()).contains("Cannot find type system.address"); } @Test @@ -723,7 +723,7 @@ public void should_show_help() throws Exception { //Then assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message())).isEqualTo(expected); + assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); } private static String reformatHtml(String rawHtml) { diff --git a/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java b/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java index 698397ae946..f3848fd1a71 100644 --- a/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java +++ b/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java @@ -34,7 +34,7 @@ import org.apache.zeppelin.display.AngularObjectRegistry; import org.apache.zeppelin.display.GUI; -import org.apache.zeppelin.display.Input.ParamOption; +import org.apache.zeppelin.display.ui.OptionInput.ParamOption; import org.apache.zeppelin.interpreter.InterpreterContext; import org.apache.zeppelin.interpreter.InterpreterException; import org.junit.Rule; diff --git a/conf/README.md b/conf/README.md deleted file mode 100644 index ab32e274569..00000000000 --- a/conf/README.md +++ /dev/null @@ -1,15 +0,0 @@ -## Enabling SSL -Enabling SSL requires a few changes. The first is to set zeppelin.ssl to true. If you'll like to use client side certificate authentication as well, then set zeppelin.ssl.client.auth to true too. - -Information how about to generate certificates and a keystore can be found [here](https://wiki.eclipse.org/Jetty/Howto/Configure_SSL). - -A condensed example can be found in the top answer to this [StackOverflow post](http://stackoverflow.com/questions/4008837/configure-ssl-on-jetty). - -The keystore holds the private key and certificate on the server end. The trustore holds the trusted client certificates. Be sure that the path and password for these two stores are correctly configured in the password fields below. They can be obfuscated using the Jetty password tool. After Maven pulls in all the dependency to build Zeppelin, one of the Jetty jars contain the Password tool. Invoke this command from the Zeppelin home build directory with the appropriate version, user, and password. - -``` -java -cp ./zeppelin-server/target/lib/jetty-all-server-.jar org.eclipse.jetty.util.security.Password -``` - -If you are using a self-signed, a certificate signed by an untrusted CA, or if client authentication is enabled, then the client must have a browser create exceptions for both the normal HTTPS port and WebSocket port. This can by done by trying to establish an HTTPS connection to both ports in a browser (i.e. if the ports are 443 and 8443, then visit https://127.0.0.1:443 and https://127.0.0.1:8443). This step can be skipped if the server certificate is signed by a trusted CA and client auth is disabled. - diff --git a/conf/interpreter-list b/conf/interpreter-list index 17a6f1e4a36..9506122f89b 100644 --- a/conf/interpreter-list +++ b/conf/interpreter-list @@ -17,20 +17,22 @@ # # [name] [maven artifact] [description] -alluxio org.apache.zeppelin:zeppelin-alluxio:0.6.1 Alluxio interpreter -angular org.apache.zeppelin:zeppelin-angular:0.6.1 HTML and AngularJS view rendering -bigquery org.apache.zeppelin:zeppelin-bigquery:0.6.1 BigQuery interpreter -cassandra org.apache.zeppelin:zeppelin-cassandra_2.11:0.6.1 Cassandra interpreter built with Scala 2.11 -elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.6.1 Elasticsearch interpreter -file org.apache.zeppelin:zeppelin-file:0.6.1 HDFS file interpreter -flink org.apache.zeppelin:zeppelin-flink_2.11:0.6.1 Flink interpreter built with Scala 2.11 -hbase org.apache.zeppelin:zeppelin-hbase:0.6.1 Hbase interpreter -ignite org.apache.zeppelin:zeppelin-ignite_2.11:0.6.1 Ignite interpreter built with Scala 2.11 -jdbc org.apache.zeppelin:zeppelin-jdbc:0.6.1 Jdbc interpreter -kylin org.apache.zeppelin:zeppelin-kylin:0.6.1 Kylin interpreter -lens org.apache.zeppelin:zeppelin-lens:0.6.1 Lens interpreter -livy org.apache.zeppelin:zeppelin-livy:0.6.1 Livy interpreter -md org.apache.zeppelin:zeppelin-markdown:0.6.1 Markdown support -postgresql org.apache.zeppelin:zeppelin-postgresql:0.6.1 Postgresql interpreter -python org.apache.zeppelin:zeppelin-python:0.6.1 Python interpreter -shell org.apache.zeppelin:zeppelin-shell:0.6.1 Shell command +alluxio org.apache.zeppelin:zeppelin-alluxio:0.7.0 Alluxio interpreter +angular org.apache.zeppelin:zeppelin-angular:0.7.0 HTML and AngularJS view rendering +beam org.apache.zeppelin:zeppelin-beam:0.7.0 Beam interpreter +bigquery org.apache.zeppelin:zeppelin-bigquery:0.7.0 BigQuery interpreter +cassandra org.apache.zeppelin:zeppelin-cassandra_2.11:0.7.0 Cassandra interpreter built with Scala 2.11 +elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.7.0 Elasticsearch interpreter +file org.apache.zeppelin:zeppelin-file:0.7.0 HDFS file interpreter +flink org.apache.zeppelin:zeppelin-flink_2.11:0.7.0 Flink interpreter built with Scala 2.11 +hbase org.apache.zeppelin:zeppelin-hbase:0.7.0 Hbase interpreter +ignite org.apache.zeppelin:zeppelin-ignite_2.11:0.7.0 Ignite interpreter built with Scala 2.11 +jdbc org.apache.zeppelin:zeppelin-jdbc:0.7.0 Jdbc interpreter +kylin org.apache.zeppelin:zeppelin-kylin:0.7.0 Kylin interpreter +lens org.apache.zeppelin:zeppelin-lens:0.7.0 Lens interpreter +livy org.apache.zeppelin:zeppelin-livy:0.7.0 Livy interpreter +md org.apache.zeppelin:zeppelin-markdown:0.7.0 Markdown support +pig org.apache.zeppelin:zeppelin-pig:0.7.0 Pig interpreter +python org.apache.zeppelin:zeppelin-python:0.7.0 Python interpreter +scio org.apache.zeppelin:zeppelin-scio_2.11:0.7.0 Scio interpreter +shell org.apache.zeppelin:zeppelin-shell:0.7.0 Shell command diff --git a/conf/shiro.ini b/conf/shiro.ini.template similarity index 84% rename from conf/shiro.ini rename to conf/shiro.ini.template index 66190c0cb93..8f40d23bba9 100644 --- a/conf/shiro.ini +++ b/conf/shiro.ini.template @@ -18,7 +18,7 @@ [users] # List of users with their password allowed to access Zeppelin. # To use a different strategy (LDAP / Database / ...) check the shiro doc at http://shiro.apache.org/configuration.html#Configuration-INISections -admin = password1 +admin = password1, admin user1 = password2, role1, role2 user2 = password3, role3 user3 = password4, role2 @@ -26,7 +26,7 @@ user3 = password4, role2 # Sample LDAP configuration, for user Authentication, currently tested for single Realm [main] ### A sample for configuring Active Directory Realm -#activeDirectoryRealm = org.apache.zeppelin.server.ActiveDirectoryGroupRealm +#activeDirectoryRealm = org.apache.zeppelin.realm.ActiveDirectoryGroupRealm #activeDirectoryRealm.systemUsername = userNameA #use either systemPassword or hadoopSecurityCredentialPath, more details in http://zeppelin.apache.org/docs/latest/security/shiroauthentication.html @@ -38,13 +38,17 @@ user3 = password4, role2 #activeDirectoryRealm.authorizationCachingEnabled = false ### A sample for configuring LDAP Directory Realm -#ldapRealm = org.apache.zeppelin.server.LdapGroupRealm +#ldapRealm = org.apache.zeppelin.realm.LdapGroupRealm ## search base for ldap groups (only relevant for LdapGroupRealm): #ldapRealm.contextFactory.environment[ldap.searchBase] = dc=COMPANY,dc=COM #ldapRealm.contextFactory.url = ldap://ldap.test.com:389 #ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM #ldapRealm.contextFactory.authenticationMechanism = SIMPLE +### A sample PAM configuration +#pamRealm=org.apache.zeppelin.realm.PamRealm +#pamRealm.service=sshd + ### A sample for configuring ZeppelinHub Realm #zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm ## Url of ZeppelinHub @@ -66,12 +70,17 @@ shiro.loginUrl = /api/login role1 = * role2 = * role3 = * +admin = * [urls] +# This section is used for url-based security. +# You can secure interpreter, configuration and credential information by urls. Comment or uncomment the below urls that you want to hide. # anon means the access is anonymous. -# authcBasic means Basic Auth Security # authc means Form based Auth Security # To enfore security, comment the line below and uncomment the next one /api/version = anon -/** = anon -#/** = authc +#/api/interpreter/** = authc, roles[admin] +#/api/configurations/** = authc, roles[admin] +#/api/credential/** = authc, roles[admin] +#/** = anon +/** = authc diff --git a/conf/zeppelin-env.cmd.template b/conf/zeppelin-env.cmd.template index 9c7f7f77ef5..cd1541e72c4 100644 --- a/conf/zeppelin-env.cmd.template +++ b/conf/zeppelin-env.cmd.template @@ -19,8 +19,8 @@ REM REM set JAVA_HOME= REM set MASTER= REM Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode. REM set ZEPPELIN_JAVA_OPTS REM Additional jvm options. for example, set ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16" -REM set ZEPPELIN_MEM REM Zeppelin jvm mem options Default -Xmx1024m -XX:MaxPermSize=512m -REM set ZEPPELIN_INTP_MEM REM zeppelin interpreter process jvm mem options. +REM set ZEPPELIN_MEM REM Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m +REM set ZEPPELIN_INTP_MEM REM zeppelin interpreter process jvm mem options. Default -Xmx1024m -Xms1024m -XX:MaxPermSize=512m REM set ZEPPELIN_INTP_JAVA_OPTS REM zeppelin interpreter process jvm options. REM set ZEPPELIN_LOG_DIR REM Where log files are stored. PWD by default. @@ -31,10 +31,17 @@ REM set ZEPPELIN_NOTEBOOK_HOMESCREEN REM Id of notebook to be displayed in home REM set ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE REM hide homescreen notebook from list when this value set to "true". default "false" REM set ZEPPELIN_NOTEBOOK_S3_BUCKET REM Bucket where notebook saved REM set ZEPPELIN_NOTEBOOK_S3_USER REM User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json +REM set ZEPPELIN_NOTEBOOK_S3_ENDPOINT REM Endpoint of the bucket +REM set ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID REM AWS KMS key ID +REM set ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION REM AWS KMS key region +REM set ZEPPELIN_NOTEBOOK_S3_SSE REM Server-side encryption enabled for notebooks REM set ZEPPELIN_IDENT_STRING REM A string representing this instance of zeppelin. $USER by default. REM set ZEPPELIN_NICENESS REM The scheduling priority for daemons. Defaults to 0. REM set ZEPPELIN_INTERPRETER_LOCALREPO REM Local repository for interpreter's additional dependency loading +REM set ZEPPELIN_INTERPRETER_DEP_MVNREPO REM Maven principal repository for interpreter's additional dependency loading +REM set ZEPPELIN_HELIUM_NPM_REGISTRY REM Remote Npm registry for Helium dependency loader REM set ZEPPELIN_NOTEBOOK_STORAGE REM Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote). +REM set ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC REM If there are multiple notebook storages, should we treat the first one as the only source of truth? REM Spark interpreter configuration @@ -62,7 +69,7 @@ REM REM set ZEPPELIN_SPARK_USEHIVECONTEXT REM Use HiveContext instead of SQLContext if set true. true by default. REM set ZEPPELIN_SPARK_CONCURRENTSQL REM Execute multiple SQL concurrently if set true. false by default. REM set ZEPPELIN_SPARK_IMPORTIMPLICIT REM Import implicits, UDF collection, and sql if set true. true by default. -REM set ZEPPELIN_SPARK_MAXRESULT REM Max number of SparkSQL result to display. 1000 by default. +REM set ZEPPELIN_SPARK_MAXRESULT REM Max number of Spark SQL result to display. 1000 by default. REM ZeppelinHub connection configuration REM diff --git a/conf/zeppelin-env.sh.template b/conf/zeppelin-env.sh.template index 3d12560c896..754c8ef349e 100644 --- a/conf/zeppelin-env.sh.template +++ b/conf/zeppelin-env.sh.template @@ -19,9 +19,10 @@ # export JAVA_HOME= # export MASTER= # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode. # export ZEPPELIN_JAVA_OPTS # Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16" -# export ZEPPELIN_MEM # Zeppelin jvm mem options Default -Xmx1024m -XX:MaxPermSize=512m -# export ZEPPELIN_INTP_MEM # zeppelin interpreter process jvm mem options. +# export ZEPPELIN_MEM # Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m +# export ZEPPELIN_INTP_MEM # zeppelin interpreter process jvm mem options. Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m # export ZEPPELIN_INTP_JAVA_OPTS # zeppelin interpreter process jvm options. +# export ZEPPELIN_SSL_PORT # ssl port (used when ssl environment variable is set to true) # export ZEPPELIN_LOG_DIR # Where log files are stored. PWD by default. # export ZEPPELIN_PID_DIR # The pid files are stored. ${ZEPPELIN_HOME}/run by default. @@ -32,10 +33,21 @@ # export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved # export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket # export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json +# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID # AWS KMS key ID +# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION # AWS KMS key region +# export ZEPPELIN_NOTEBOOK_S3_SSE # Server-side encryption enabled for notebooks +# export ZEPPELIN_NOTEBOOK_MONGO_URI # MongoDB connection URI used to connect to a MongoDB database server. Default "mongodb://localhost" +# export ZEPPELIN_NOTEBOOK_MONGO_DATABASE # Database name to store notebook. Default "zeppelin" +# export ZEPPELIN_NOTEBOOK_MONGO_COLLECTION # Collection name to store notebook. Default "notes" +# export ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT # If "true" import local notes under ZEPPELIN_NOTEBOOK_DIR on startup. Default "false" # export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default. # export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0. # export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading +# export ZEPPELIN_INTERPRETER_DEP_MVNREPO # Remote principal repository for interpreter's additional dependency loading +# export ZEPPELIN_HELIUM_NPM_REGISTRY # Remote Npm registry for Helium dependency loader # export ZEPPELIN_NOTEBOOK_STORAGE # Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote). +# export ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC # If there are multiple notebook storages, should we treat the first one as the only source of truth? +# export ZEPPELIN_NOTEBOOK_PUBLIC # Make notebook public by default when created, private otherwise #### Spark interpreter configuration #### @@ -62,7 +74,7 @@ # export ZEPPELIN_SPARK_USEHIVECONTEXT # Use HiveContext instead of SQLContext if set true. true by default. # export ZEPPELIN_SPARK_CONCURRENTSQL # Execute multiple SQL concurrently if set true. false by default. # export ZEPPELIN_SPARK_IMPORTIMPLICIT # Import implicits, UDF collection, and sql if set true. true by default. -# export ZEPPELIN_SPARK_MAXRESULT # Max number of SparkSQL result to display. 1000 by default. +# export ZEPPELIN_SPARK_MAXRESULT # Max number of Spark SQL result to display. 1000 by default. # export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000 @@ -77,3 +89,7 @@ # export ZEPPELINHUB_API_ADDRESS # Refers to the address of the ZeppelinHub service in use # export ZEPPELINHUB_API_TOKEN # Refers to the Zeppelin instance token of the user # export ZEPPELINHUB_USER_KEY # Optional, when using Zeppelin with authentication. + +#### Zeppelin impersonation configuration +# export ZEPPELIN_IMPERSONATE_CMD # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c ' +# export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template index 8c2cac7f82d..2a03cd9d465 100755 --- a/conf/zeppelin-site.xml.template +++ b/conf/zeppelin-site.xml.template @@ -31,6 +31,12 @@ Server port. + + zeppelin.server.ssl.port + 8443 + Server ssl port. (used when ssl property is set to true) + + zeppelin.server.context.path / @@ -102,6 +108,16 @@ --> + + + + @@ -113,6 +129,14 @@ --> + + - @@ -153,17 +177,56 @@ + + + + + zeppelin.notebook.storage + org.apache.zeppelin.notebook.repo.GitNotebookRepo + versioned notebook persistence layer implementation + + + + zeppelin.notebook.one.way.sync + false + If there are multiple notebook storages, should we treat the first one as the only source of truth? + + zeppelin.interpreter.dir interpreter @@ -176,15 +239,33 @@ Local repository for interpreter's additional dependency loading + + zeppelin.interpreter.dep.mvnRepo + http://repo1.maven.org/maven2/ + Remote principal repository for interpreter's additional dependency loading + + + + zeppelin.dep.localrepo + local-repo + Local repository for dependency loader + + + + zeppelin.helium.npm.registry + http://registry.npmjs.org/ + Remote Npm registry for Helium dependency loader + + zeppelin.interpreters - org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter + org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.python.PythonInterpreterPandasSql,org.apache.zeppelin.python.PythonCondaInterpreter,org.apache.zeppelin.python.PythonDockerInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivyPySpark3Interpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter,org.apache.zeppelin.pig.PigInterpreter,org.apache.zeppelin.pig.PigQueryInterpreter,org.apache.zeppelin.scio.ScioInterpreter,org.apache.zeppelin.groovy.GroovyInterpreter Comma separated interpreter configurations. First interpreter become a default zeppelin.interpreter.group.order - spark,md,angular,sh,livy,alluxio,file,psql,flink,python,ignite,lens,cassandra,geode,kylin,elasticsearch,scalding,jdbc,hbase,bigquery + spark,md,angular,sh,livy,alluxio,file,psql,flink,python,ignite,lens,cassandra,geode,kylin,elasticsearch,scalding,jdbc,hbase,bigquery,beam,groovy @@ -194,6 +275,11 @@ Interpreter process connect timeout in msec. + + zeppelin.interpreter.output.limit + 102400 + Output message from interpreter exceeding the limit will be truncated + zeppelin.ssl @@ -265,11 +351,30 @@ Anonymous user allowed by default + + zeppelin.notebook.public + true + Make notebook public by default when created, private otherwise + + zeppelin.websocket.max.text.message.size 1024000 Size in characters of the maximum text message to be received by websocket. Defaults to 1024000 - + + zeppelin.server.default.dir.allowed + false + Enable directory listings on server. + + + + diff --git a/dev/change_scala_version.sh b/dev/change_scala_version.sh index cb2c842e683..0ccfe7e263f 100755 --- a/dev/change_scala_version.sh +++ b/dev/change_scala_version.sh @@ -34,7 +34,7 @@ if [[ ($# -ne 1) || ( $1 == "--help") || $1 == "-h" ]]; then usage fi -TO_VERSION=$1 +TO_VERSION="$1" check_scala_version() { for i in ${VALID_VERSIONS[*]}; do [ $i = "$1" ] && return 0; done @@ -42,12 +42,14 @@ check_scala_version() { exit 1 } -check_scala_version "$TO_VERSION" +check_scala_version "${TO_VERSION}" -if [ $TO_VERSION = "2.11" ]; then +if [ "${TO_VERSION}" = "2.11" ]; then FROM_VERSION="2.10" + SCALA_LIB_VERSION="2.11.7" else FROM_VERSION="2.11" + SCALA_LIB_VERSION="2.10.5" fi sed_i() { @@ -57,11 +59,17 @@ sed_i() { export -f sed_i BASEDIR=$(dirname $0)/.. -find "$BASEDIR" -name 'pom.xml' -not -path '*target*' -print \ - -exec bash -c "sed_i 's/\(artifactId.*\)_'$FROM_VERSION'/\1_'$TO_VERSION'/g' {}" \; +find "${BASEDIR}" -name 'pom.xml' -not -path '*target*' -print \ + -exec bash -c "sed_i 's/\(artifactId.*\)_'${FROM_VERSION}'/\1_'${TO_VERSION}'/g' {}" \; -# Also update in parent POM +# update in parent POM # Match any scala binary version to ensure idempotency -sed_i '1,/[0-9]*\.[0-9]*[0-9]*\.[0-9]*'$TO_VERSION'[0-9]*\.[0-9]*[0-9]*\.[0-9]*'${TO_VERSION}' in parent POM +# This is to make variables in leaf pom to be substituted to real value when flattened-pom is created. +# maven-flatten plugin doesn't take properties defined under profile even if scala-2.11/scala-2.10 is activated via -Pscala-2.11/-Pscala-2.10, +# and use default defined properties to create flatten pom. +sed_i '1,/[0-9]*\.[0-9]*\.[0-9]*[0-9]*\.[0-9]*\.[0-9]*'${SCALA_LIB_VERSION}' /dev/null 2>&1 + +# Change version in example and package files +sed -i '' 's/-'"${FROM_VERSION}"'.jar",/-'"${TO_VERSION}"'.jar",/g' zeppelin-examples/zeppelin-example-clock/zeppelin-example-clock.json +sed -i '' 's/"version": "'"${FROM_VERSION}"'",/"version": "'"${TO_VERSION}"'",/g' zeppelin-web/src/app/tabledata/package.json +sed -i '' 's/"version": "'"${FROM_VERSION}"'",/"version": "'"${TO_VERSION}"'",/g' zeppelin-web/src/app/visualization/package.json + +# Change version in Dockerfile +sed -i '' 's/Z_VERSION="'"${FROM_VERSION}"'"/Z_VERSION="'"${TO_VERSION}"'"/g' scripts/docker/zeppelin/bin/Dockerfile + +# When preparing new dev version from release tag, doesn't need to change docs version +if is_dev_version "${FROM_VERSION}" || ! is_dev_version "${TO_VERSION}"; then + # When prepare new rc for the maintenance release + if is_dev_version "${FROM_VERSION}" && is_maintenance_version "${TO_VERSION}" \ + && [[ "${FROM_VERSION}" == "${TO_VERSION}"* ]]; then + FROM_VERSION=$(echo "${TO_VERSION}" | awk -F. '{ printf("%d.%d.%d", $1, $2, $3-1) }') + fi + + # Change zeppelin version in docs config + sed -i '' 's/ZEPPELIN_VERSION : '"${FROM_VERSION}"'$/ZEPPELIN_VERSION : '"$TO_VERSION"'/g' docs/_config.yml + sed -i '' 's/BASE_PATH : \/docs\/'"${FROM_VERSION}"'$/BASE_PATH : \/docs\/'"$TO_VERSION"'/g' docs/_config.yml + + # Change interpreter's maven version in docs and interpreter-list + sed -i '' 's/:'"${FROM_VERSION}"'/:'"${TO_VERSION}"'/g' conf/interpreter-list + sed -i '' 's/:'"${FROM_VERSION}"'/:'"${TO_VERSION}"'/g' docs/manual/interpreterinstallation.md +fi diff --git a/dev/common_release.sh b/dev/common_release.sh index df9c33db00f..6b7e901b130 100644 --- a/dev/common_release.sh +++ b/dev/common_release.sh @@ -33,6 +33,9 @@ fi mkdir "${WORKING_DIR}" +# If set to 'yes', release script will deploy artifacts to SNAPSHOT repository. +DO_SNAPSHOT='no' + usage() { echo "usage) $0 [Release version] [Branch or Tag]" echo " ex. $0 0.6.0 v0.6.0" diff --git a/dev/create_release.sh b/dev/create_release.sh index 272713baa6b..9cb61e0351c 100755 --- a/dev/create_release.sh +++ b/dev/create_release.sh @@ -33,13 +33,16 @@ if [[ $# -ne 2 ]]; then usage fi -if [[ -z "${GPG_PASSPHRASE}" ]]; then - echo "You need GPG_PASSPHRASE variable set" - exit 1 -fi +for var in GPG_PASSPHRASE; do + if [[ -z "${!var}" ]]; then + echo "You need ${var} variable set" + exit 1 + fi +done RELEASE_VERSION="$1" GIT_TAG="$2" +SCALA_VERSION="2.11" function make_source_package() { # create source package @@ -66,7 +69,7 @@ function make_binary_release() { cp -r "${WORKING_DIR}/zeppelin" "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}" cd "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}" - ./dev/change_scala_version.sh 2.11 + ./dev/change_scala_version.sh "${SCALA_VERSION}" echo "mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}" mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS} if [[ $? -ne 0 ]]; then @@ -103,8 +106,8 @@ function make_binary_release() { git_clone make_source_package -make_binary_release all "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -Pscala-2.11" -make_binary_release netinst "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -Pscala-2.11 -pl !alluxio,!angular,!cassandra,!elasticsearch,!file,!flink,!hbase,!ignite,!jdbc,!kylin,!lens,!livy,!markdown,!postgresql,!python,!shell,!bigquery" +make_binary_release all "-Pspark-2.1 -Phadoop-2.6 -Pscala-${SCALA_VERSION}" +make_binary_release netinst "-Pspark-2.1 -Phadoop-2.6 -Pscala-${SCALA_VERSION} -pl zeppelin-interpreter,zeppelin-zengine,:zeppelin-display_${SCALA_VERSION},:zeppelin-spark-dependencies_${SCALA_VERSION},:zeppelin-spark_${SCALA_VERSION},zeppelin-web,zeppelin-server,zeppelin-distribution -am" # remove non release files and dirs rm -rf "${WORKING_DIR}/zeppelin" diff --git a/dev/publish_release.sh b/dev/publish_release.sh index 3a0a0f52f42..b569ec4ba92 100755 --- a/dev/publish_release.sh +++ b/dev/publish_release.sh @@ -1,5 +1,4 @@ #!/bin/bash - # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with @@ -43,8 +42,11 @@ NC='\033[0m' # No Color RELEASE_VERSION="$1" GIT_TAG="$2" +if [[ $RELEASE_VERSION == *"SNAPSHOT"* ]]; then + DO_SNAPSHOT="yes" +fi -PUBLISH_PROFILES="-Pbuild-distr -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr" +PUBLISH_PROFILES="-Ppublish-distr -Pspark-2.1 -Phadoop-2.6 -Pr" PROJECT_OPTIONS="-pl !zeppelin-distribution" NEXUS_STAGING="https://repository.apache.org/service/local/staging" NEXUS_PROFILE="153446d1ac37c4" @@ -67,6 +69,40 @@ function curl_error() { fi } + +# +# Publishing Apache Zeppelin artifact to Apache snapshot repository. +# +function publish_snapshot_to_maven() { + cd "${WORKING_DIR}/zeppelin" + echo "Deploying Apache Zeppelin $RELEASE_VERSION version to snapshot repository." + + if [[ ! $RELEASE_VERSION == *"SNAPSHOT"* ]]; then + echo "ERROR: Snapshots must have a version containing 'SNAPSHOT'" + echo "ERROR: You gave version '$RELEASE_VERSION'" + exit 1 + fi + + tmp_repo="$(mktemp -d /tmp/zeppelin-repo-XXXXX)" + + mvn versions:set -DnewVersion=$RELEASE_VERSION + tmp_settings="tmp-settings.xml" + echo "" > $tmp_settings + echo "apache.snapshots.https$ASF_USERID" >> $tmp_settings + echo "$ASF_PASSWORD" >> $tmp_settings + echo "" >> $tmp_settings + + mvn --settings $tmp_settings -Dmaven.repo.local="${tmp_repo}" -Pbeam -DskipTests \ + $PUBLISH_PROFILES -Drat.skip=true deploy + + "${BASEDIR}/change_scala_version.sh" 2.11 + mvn -Pscala-2.11 --settings $tmp_settings -Dmaven.repo.local="${tmp_repo}" -Pbeam -DskipTests \ + $PUBLISH_PROFILES -Drat.skip=true clean deploy + + rm $tmp_settings + rm -rf $tmp_repo +} + function publish_to_maven() { cd "${WORKING_DIR}/zeppelin" @@ -94,9 +130,9 @@ function publish_to_maven() { # build with scala-2.10 echo "mvn clean install -DskipTests \ - -Dmaven.repo.local=${tmp_repo} -Pscala-2.10 \ + -Dmaven.repo.local=${tmp_repo} -Pscala-2.10 -Pbeam \ ${PUBLISH_PROFILES} ${PROJECT_OPTIONS}" - mvn clean install -DskipTests -Dmaven.repo.local="${tmp_repo}" -Pscala-2.10 \ + mvn clean install -DskipTests -Dmaven.repo.local="${tmp_repo}" -Pscala-2.10 -Pbeam \ ${PUBLISH_PROFILES} ${PROJECT_OPTIONS} if [[ $? -ne 0 ]]; then echo "Build with scala 2.10 failed." @@ -153,5 +189,9 @@ function publish_to_maven() { } git_clone -publish_to_maven +if [[ "${DO_SNAPSHOT}" == 'yes' ]]; then + publish_snapshot_to_maven +else + publish_to_maven +fi cleanup diff --git a/dev/tag_release.sh b/dev/tag_release.sh new file mode 100755 index 00000000000..14096ae64cf --- /dev/null +++ b/dev/tag_release.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +WORKING_DIR="/tmp/apache-zeppelin" + +for var in CURRENT_VERSION RELEASE_VERSION NEXT_DEV_VERSION RC_TAG GIT_BRANCH; do + if [[ -z "${!var}" ]]; then + echo "You need ${var} variable set" + exit 1 + fi +done + +set -e + +git clone https://git-wip-us.apache.org/repos/asf/zeppelin.git "${WORKING_DIR}" +pushd "${WORKING_DIR}" + +git checkout "${GIT_BRANCH}" + +# Create release version +./dev/change_zeppelin_version.sh "${CURRENT_VERSION}" "${RELEASE_VERSION}" +git commit -a -m "Preparing Apache Zeppelin release ${RELEASE_VERSION}" +echo "Creating tag ${RC_TAG} at the head of ${GIT_BRANCH}" +git tag "${RC_TAG}" + +# Create next dev version +./dev/change_zeppelin_version.sh "${RELEASE_VERSION}" "${NEXT_DEV_VERSION}" +git commit -a -m "Preparing development version ${NEXT_DEV_VERSION}" + +git push origin "${RC_TAG}" +git push origin HEAD:"${GIT_BRANCH}" + +popd +rm -rf "${WORKING_DIR}" diff --git a/dev/test_zeppelin_pr.py b/dev/test_zeppelin_pr.py index 9b21b0e120c..22602d0dd6d 100755 --- a/dev/test_zeppelin_pr.py +++ b/dev/test_zeppelin_pr.py @@ -24,19 +24,25 @@ # then pr[#PR] branch will be created. # -import sys, os, subprocess -import json, urllib +from __future__ import print_function +import sys, os, subprocess, json, codecs + +if sys.version_info[0] == 2: + from urllib import urlopen +else: + from urllib.request import urlopen if len(sys.argv) == 1: - print "usage) " + sys.argv[0] + " [#PR]" - print " eg) " + sys.argv[0] + " 122" + print("usage) " + sys.argv[0] + " [#PR]") + print(" eg) " + sys.argv[0] + " 122") sys.exit(1) pr=sys.argv[1] githubApi="https://api.github.com/repos/apache/zeppelin" -prInfo = json.load(urllib.urlopen(githubApi + "/pulls/" + pr)) +reader = codecs.getreader("utf-8") +prInfo = json.load(reader(urlopen(githubApi + "/pulls/" + pr))) if "message" in prInfo and prInfo["message"] == "Not Found": sys.stderr.write("PullRequest #" + pr + " not found\n") sys.exit(1) @@ -44,6 +50,7 @@ prUser=prInfo['user']['login'] prRepoUrl=prInfo['head']['repo']['clone_url'] prBranch=prInfo['head']['label'].replace(":", "/") +print(prBranch) # create local branch exitCode = os.system("git checkout -b pr" + pr) @@ -63,21 +70,21 @@ sys.exit(1) -currentBranch = subprocess.check_output(["git rev-parse --abbrev-ref HEAD"], shell=True).rstrip() +currentBranch = subprocess.check_output("git rev-parse --abbrev-ref HEAD", shell=True).rstrip().decode("utf-8") -print "Merge branch " + prBranch + " into " + currentBranch +print("Merge branch " + prBranch + " into " + currentBranch) -rev = subprocess.check_output(["git rev-parse " + prBranch], shell=True).rstrip() -prAuthor = subprocess.check_output(["git --no-pager show -s --format='%an <%ae>' " + rev], shell=True).rstrip() -prAuthorDate = subprocess.check_output(["git --no-pager show -s --format='%ad' " + rev], shell=True).rstrip() +rev = subprocess.check_output("git rev-parse " + prBranch, shell=True).rstrip().decode("utf-8") +prAuthor = subprocess.check_output("git --no-pager show -s --format=\"%an <%ae>\" " + rev, shell=True).rstrip().decode("utf-8") +prAuthorDate = subprocess.check_output("git --no-pager show -s --format=\"%ad\" " + rev, shell=True).rstrip().decode("utf-8") prTitle = prInfo['title'] prBody = prInfo['body'] -commitList = subprocess.check_output(["git log --pretty=format:'%h' " + currentBranch + ".." + prBranch], shell=True).rstrip() +commitList = subprocess.check_output("git log --pretty=format:\"%h\" " + currentBranch + ".." + prBranch, shell=True).rstrip().decode("utf-8") authorList = [] for commitHash in commitList.split("\n"): - a = subprocess.check_output(["git show -s --pretty=format:'%an <%ae>' "+commitHash], shell=True).rstrip() + a = subprocess.check_output("git show -s --pretty=format:\"%an <%ae>\" "+commitHash, shell=True).rstrip().decode("utf-8") if a not in authorList: authorList.append(a) @@ -85,20 +92,20 @@ if prBody : commitMsg += prBody + "\n\n" for author in authorList: - commitMsg += "Author: " + author+"\n" + commitMsg += "Author: " + author +"\n" commitMsg += "\n" commitMsg += "Closes #" + pr + " from " + prBranch + " and squashes the following commits:\n\n" -commitMsg += subprocess.check_output(["git log --pretty=format:'%h [%an] %s' " + currentBranch + ".." + prBranch], shell=True).rstrip() +commitMsg += subprocess.check_output("git log --pretty=format:\"%h [%an] %s\" " + currentBranch + ".." + prBranch, shell=True).rstrip().decode("utf-8") exitCode = os.system("git merge --no-commit --squash " + prBranch) if exitCode != 0: sys.stderr.write("Can not merge\n") sys.exit(1) -exitCode = os.system('git commit -a --author "' + prAuthor + '" --date "' + prAuthorDate + '" -m"' + commitMsg.encode('utf-8') + '"') +exitCode = os.system('git commit -a --author "' + prAuthor + '" --date "' + prAuthorDate + '" -m"' + commitMsg + '"') if exitCode != 0: sys.stderr.write("Commit failed\n") sys.exit(1) os.system("git remote remove " + prUser) -print "Branch " + prBranch + " is merged into " + currentBranch +print("Branch " + prBranch + " is merged into " + currentBranch) diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index 170579e0edf..aaccff9451e 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -27,7 +27,7 @@ GEM maruku (~> 0.6.0) pygments.rb (~> 0.5.0) redcarpet (~> 2.3.0) - safe_yaml (~> 0.9.7) + safe_yaml (~> 1.0.4) kramdown (1.2.0) liquid (2.5.4) listen (1.3.1) @@ -47,7 +47,7 @@ GEM ffi (>= 0.5.0) rdiscount (2.1.7) redcarpet (2.3.0) - safe_yaml (0.9.7) + safe_yaml (1.0.4) syntax (1.0.0) yajl-ruby (1.1.0) diff --git a/docs/README.md b/docs/README.md index 6a4c0de62b4..4dc810edf18 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,39 +1,55 @@ -## Zeppelin documentation - -This readme will walk you through building the Zeppelin documentation, which is included here with the Zeppelin source code. +# Apache Zeppelin documentation +This README will walk you through building the documentation of Apache Zeppelin. The documentation is included here with Apache Zeppelin source code. The online documentation at [https://zeppelin.apache.org/docs/](https://zeppelin.apache.org/docs/latest/) is also generated from the files found in here. ## Build documentation -See https://help.github.com/articles/using-jekyll-with-pages#installing-jekyll +Zeppelin is using [Jekyll](https://jekyllrb.com/) which is a static site generator and [Github Pages](https://pages.github.com/) as a site publisher. For the more details, see [help.github.com/articles/about-github-pages-and-jekyll/](https://help.github.com/articles/about-github-pages-and-jekyll/). + +**Requirements** + +``` +# ruby --version >= 2.0.0 +# Install Bundler using gem +gem install bundler + +cd $ZEPPELIN_HOME/docs +# Install all dependencies declared in the Gemfile +bundle install +``` + +For the further information about requirements, please see [here](https://help.github.com/articles/setting-up-your-github-pages-site-locally-with-jekyll/#requirements). -**tl;dr version:** +On OS X 10.9, you may need to do ``` - ruby --version >= 1.9.3 - gem install bundler - # go to /docs under your Zeppelin source - bundle install +xcode-select --install ``` -*On OS X 10.9 you may need to do "xcode-select --install"* +## Run website locally +If you don't want to encounter uglily rendered pages, run the documentation site in your local first. +In `$ZEPPELIN_HOME/docs`, -## Run website +``` +bundle exec jekyll serve --watch +``` - bundle exec jekyll serve --watch +Using the above command, Jekyll will start a web server at `http://localhost:4000` and watch the `/docs` directory to update. -## Adding a new page - rake page name="new-page.md" +## Contribute to Zeppelin documentation +If you wish to help us and contribute to Zeppelin Documentation, please look at [Zeppelin Documentation's contribution guideline](https://zeppelin.apache.org/contribution/contributions.html). -## Bumping up version in a new release +## For committers only +### Bumping up version in a new release * `ZEPPELIN_VERSION` and `BASE_PATH` property in _config.yml -## Deploy to ASF svnpubsub infra (for committers only) +### Deploy to ASF svnpubsub infra 1. generate static website in `./_site` + ``` # go to /docs under Zeppelin source bundle exec jekyll build --safe diff --git a/docs/_config.yml b/docs/_config.yml index ea58475cfeb..69d0d836982 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -21,7 +21,7 @@ author : twitter : ASF feedburner : feedname -ZEPPELIN_VERSION : 0.6.1 +ZEPPELIN_VERSION : 0.8.0-SNAPSHOT # The production_url is only used when full-domain names are needed # such as sitemap.txt @@ -59,7 +59,7 @@ JB : # - Only the following values are falsy: ["", null, false] # - When setting BASE_PATH it must be a valid url. # This means always setting the protocol (http|https) or prefixing with "/" - BASE_PATH : /docs/0.6.1 + BASE_PATH : /docs/0.8.0-SNAPSHOT # By default, the asset_path is automatically defined relative to BASE_PATH plus the enabled theme. # ex: [BASE_PATH]/assets/themes/[THEME-NAME] diff --git a/docs/_includes/themes/zeppelin/_navigation.html b/docs/_includes/themes/zeppelin/_navigation.html index c593e1bd752..4e49a1acb3a 100644 --- a/docs/_includes/themes/zeppelin/_navigation.html +++ b/docs/_includes/themes/zeppelin/_navigation.html @@ -7,11 +7,15 @@ - - I'm zeppelin - Zeppelin - {{site.ZEPPELIN_VERSION}} - + - diff --git a/docs/_includes/themes/zeppelin/default.html b/docs/_includes/themes/zeppelin/default.html index cd07602ec90..8d84124bb1e 100644 --- a/docs/_includes/themes/zeppelin/default.html +++ b/docs/_includes/themes/zeppelin/default.html @@ -2,7 +2,7 @@ - {{ page.title }} + Apache Zeppelin {{ site.ZEPPELIN_VERSION }} Documentation: {{ page.title }} {% if page.description %}{% endif %} @@ -34,6 +34,8 @@ + + diff --git a/docs/assets/themes/zeppelin/css/style.css b/docs/assets/themes/zeppelin/css/style.css index 1a6f3da976a..2847a51edc2 100644 --- a/docs/assets/themes/zeppelin/css/style.css +++ b/docs/assets/themes/zeppelin/css/style.css @@ -79,7 +79,7 @@ body { padding-bottom: 10px; } -.navbar-brand img { +.navbar-brand-main img { margin: 0; } @@ -124,7 +124,7 @@ body { background: #265380; } -.navbar-inverse .navbar-brand { +.navbar-inverse .navbar-brand-main { color: white; text-decoration: none; font-size: 32px; @@ -438,11 +438,10 @@ a.anchor { .content table { display: block; width: 100%; - overflow: auto; word-break: normal; word-break: keep-all; -webkit-overflow-scrolling: touch; - font-size: 90%; + font-size: 87%; margin-top: 16px; margin-bottom: 16px; } @@ -453,10 +452,19 @@ a.anchor { } .content table th, .content table td { - padding: 0.5rem 1rem; + padding: 0.7rem 1rem; border: 1px solid #e9ebec; } +.properties { + font-size: 12.5px !important; + font-weight: normal; + color: #4c555a !important; + margin-bottom: 0px; +} + + + /* posts index */ .post > h3.title { position: relative; @@ -524,17 +532,10 @@ a.anchor { color: white; } -.navbar-brand { +.navbar-brand-main { font-family: 'Patua One', cursive; } -.navbar-brand small { - font-size: 14px; - font-family: 'Open Sans', 'Helvetica Neue', Helvetica; - color: white; - vertical-align: bottom; -} - .navbar-collapse.collapse { max-height: 50px; } @@ -569,19 +570,43 @@ a.anchorjs-link:hover { text-decoration: none; } margin-left: -18px; } +/* Search Page */ +#search p { + font-size: 30px; + font-weight: bold; + color: black; +} + +#search_results p { + font-size: 13px; + font-weight: 400; +} + +#search_results a { + vertical-align: super; + font-size: 16px; + text-decoration: underline; +} + +#search_results .link { + font-size: 13px; + color: #008000; + padding-bottom: 3px; +} + /* Custom, iPhone Retina */ @media only screen and (max-width: 480px) { .jumbotron h1 { display: none; } - .navbar-brand small { + .navbar-brand-version small { display: none; color: white; } } @media only screen and (max-width: 768px) { - .navbar .navbar-brand { + .navbar .navbar-brand-main { padding-bottom: 0; } } @@ -589,13 +614,17 @@ a.anchorjs-link:hover { text-decoration: none; } @media only screen and (min-width: 768px) and (max-width: 1024px) { - .navbar-brand small { + .navbar-brand-version small { display: none; } .navbar-collapse.collapse { padding-right: 0; } + + .navbar-fixed-top > .container { + width: 800px; + } } /* master branch docs dropdown menu */ @@ -619,8 +648,21 @@ and (max-width: 1024px) { border-bottom-color: #428bca; } -#menu .navbar-brand { +#menu .navbar-brand-version { margin-right: 50px; + text-decoration: none !important; +} + +#menu .navbar-brand-version span { + float: none; + display: inline-block; + vertical-align: bottom; +} + +#menu .navbar-brand-version small { + font-size: 14px; + font-family: 'Open Sans', 'Helvetica Neue', Helvetica; + color: white; } /* gh-pages branch docs dropdown menu */ diff --git a/docs/assets/themes/zeppelin/img/docs-img/add_credential.png b/docs/assets/themes/zeppelin/img/docs-img/add_credential.png new file mode 100644 index 00000000000..102b3ec7d79 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/add_credential.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/cdh_yarn_applications.png b/docs/assets/themes/zeppelin/img/docs-img/cdh_yarn_applications.png new file mode 100644 index 00000000000..980ea5b01fd Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/cdh_yarn_applications.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/click_create_button.png b/docs/assets/themes/zeppelin/img/docs-img/click_create_button.png new file mode 100644 index 00000000000..d6f3c15b910 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/click_create_button.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/click_interpreter_binding_button.png b/docs/assets/themes/zeppelin/img/docs-img/click_interpreter_binding_button.png new file mode 100644 index 00000000000..1c1a36ac51e Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/click_interpreter_binding_button.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/credential_tab.png b/docs/assets/themes/zeppelin/img/docs-img/credential_tab.png new file mode 100644 index 00000000000..66a1fbed56e Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/credential_tab.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/edit_dependencies.png b/docs/assets/themes/zeppelin/img/docs-img/edit_dependencies.png new file mode 100644 index 00000000000..30f22db25d1 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/edit_dependencies.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/edit_properties.png b/docs/assets/themes/zeppelin/img/docs-img/edit_properties.png new file mode 100644 index 00000000000..e67d49bcff4 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/edit_properties.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/elasticsearch-config.png b/docs/assets/themes/zeppelin/img/docs-img/elasticsearch-config.png index b5f7ddab3c0..54a634a717d 100644 Binary files a/docs/assets/themes/zeppelin/img/docs-img/elasticsearch-config.png and b/docs/assets/themes/zeppelin/img/docs-img/elasticsearch-config.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/hive_setting.png b/docs/assets/themes/zeppelin/img/docs-img/hive_setting.png new file mode 100644 index 00000000000..31a9821360e Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/hive_setting.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/homepage_custom_notebook_list.png b/docs/assets/themes/zeppelin/img/docs-img/homepage_custom_notebook_list.png new file mode 100644 index 00000000000..13bcadcaa55 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/homepage_custom_notebook_list.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/interpreter-add-repo2.png b/docs/assets/themes/zeppelin/img/docs-img/interpreter-add-repo2.png index bc7c2ebc2b6..62708c60001 100644 Binary files a/docs/assets/themes/zeppelin/img/docs-img/interpreter-add-repo2.png and b/docs/assets/themes/zeppelin/img/docs-img/interpreter-add-repo2.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/jdbc-multi-connection-setting.png b/docs/assets/themes/zeppelin/img/docs-img/jdbc-multi-connection-setting.png deleted file mode 100644 index 4b4d7b50ff2..00000000000 Binary files a/docs/assets/themes/zeppelin/img/docs-img/jdbc-multi-connection-setting.png and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/jdbc-simple-connection-setting.png b/docs/assets/themes/zeppelin/img/docs-img/jdbc-simple-connection-setting.png deleted file mode 100644 index 6134b39a7a6..00000000000 Binary files a/docs/assets/themes/zeppelin/img/docs-img/jdbc-simple-connection-setting.png and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/jdbc_interpreter_binding.png b/docs/assets/themes/zeppelin/img/docs-img/jdbc_interpreter_binding.png new file mode 100644 index 00000000000..86a7ce418b2 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/jdbc_interpreter_binding.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/mariadb_setting.png b/docs/assets/themes/zeppelin/img/docs-img/mariadb_setting.png new file mode 100644 index 00000000000..da32528ece4 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/mariadb_setting.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/markdown-example-pegdown-parser-plugins.png b/docs/assets/themes/zeppelin/img/docs-img/markdown-example-pegdown-parser-plugins.png new file mode 100644 index 00000000000..e3455e31ce3 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/markdown-example-pegdown-parser-plugins.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/markdown-example-pegdown-parser.png b/docs/assets/themes/zeppelin/img/docs-img/markdown-example-pegdown-parser.png new file mode 100644 index 00000000000..21e8bc5bb7c Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/markdown-example-pegdown-parser.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/markdown-interpreter-setting.png b/docs/assets/themes/zeppelin/img/docs-img/markdown-interpreter-setting.png index 7b294e1eb11..1d427797f60 100644 Binary files a/docs/assets/themes/zeppelin/img/docs-img/markdown-interpreter-setting.png and b/docs/assets/themes/zeppelin/img/docs-img/markdown-interpreter-setting.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/matplotlibAngularExample.gif b/docs/assets/themes/zeppelin/img/docs-img/matplotlibAngularExample.gif new file mode 100644 index 00000000000..4696bcf52bc Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/matplotlibAngularExample.gif differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/mesos_frameworks.png b/docs/assets/themes/zeppelin/img/docs-img/mesos_frameworks.png new file mode 100644 index 00000000000..af428930fd6 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/mesos_frameworks.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/mysql_setting.png b/docs/assets/themes/zeppelin/img/docs-img/mysql_setting.png new file mode 100644 index 00000000000..f4e4a65b3ed Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/mysql_setting.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/phoenix_thick_setting.png b/docs/assets/themes/zeppelin/img/docs-img/phoenix_thick_setting.png new file mode 100644 index 00000000000..57f524e445d Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/phoenix_thick_setting.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/phoenix_thin_setting.png b/docs/assets/themes/zeppelin/img/docs-img/phoenix_thin_setting.png new file mode 100644 index 00000000000..8f93ab6ac86 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/phoenix_thin_setting.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/postgres_setting.png b/docs/assets/themes/zeppelin/img/docs-img/postgres_setting.png new file mode 100644 index 00000000000..7eb7a9b0003 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/postgres_setting.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/redshift_setting.png b/docs/assets/themes/zeppelin/img/docs-img/redshift_setting.png new file mode 100644 index 00000000000..54a5f710139 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/redshift_setting.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/run_paragraph_with_jdbc.png b/docs/assets/themes/zeppelin/img/docs-img/run_paragraph_with_jdbc.png new file mode 100644 index 00000000000..41638da6f83 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/run_paragraph_with_jdbc.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/select_name_and_group.png b/docs/assets/themes/zeppelin/img/docs-img/select_name_and_group.png new file mode 100644 index 00000000000..9c963b8ae9d Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/select_name_and_group.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/shell-example.png b/docs/assets/themes/zeppelin/img/docs-img/shell-example.png new file mode 100644 index 00000000000..2d7fa9a878d Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/shell-example.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/spark_ui.png b/docs/assets/themes/zeppelin/img/docs-img/spark_ui.png new file mode 100644 index 00000000000..ca91cf02432 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/spark_ui.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/standalone_conf.png b/docs/assets/themes/zeppelin/img/docs-img/standalone_conf.png new file mode 100644 index 00000000000..908fc84fbf7 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/standalone_conf.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/tajo_setting.png b/docs/assets/themes/zeppelin/img/docs-img/tajo_setting.png new file mode 100644 index 00000000000..1e56d648f1a Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/tajo_setting.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/tested_databases.png b/docs/assets/themes/zeppelin/img/docs-img/tested_databases.png new file mode 100644 index 00000000000..fb6ace22d85 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/tested_databases.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/writing_spell_registered.png b/docs/assets/themes/zeppelin/img/docs-img/writing_spell_registered.png new file mode 100644 index 00000000000..e8bcd473088 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/writing_spell_registered.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/writing_spell_using.gif b/docs/assets/themes/zeppelin/img/docs-img/writing_spell_using.gif new file mode 100644 index 00000000000..ac73cefc13c Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/writing_spell_using.gif differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/writing_visualization_example.png b/docs/assets/themes/zeppelin/img/docs-img/writing_visualization_example.png new file mode 100644 index 00000000000..219d7c8b4e5 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/writing_visualization_example.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/writing_visualization_helium_menu.png b/docs/assets/themes/zeppelin/img/docs-img/writing_visualization_helium_menu.png new file mode 100644 index 00000000000..7e1ce20831b Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/writing_visualization_helium_menu.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/yarn_applications.png b/docs/assets/themes/zeppelin/img/docs-img/yarn_applications.png new file mode 100644 index 00000000000..06c5296ad63 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/yarn_applications.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/zeppelin_mesos_conf.png b/docs/assets/themes/zeppelin/img/docs-img/zeppelin_mesos_conf.png new file mode 100644 index 00000000000..b85a3da4744 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/zeppelin_mesos_conf.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/zeppelin_with_cdh.png b/docs/assets/themes/zeppelin/img/docs-img/zeppelin_with_cdh.png new file mode 100644 index 00000000000..9dae220cb28 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/zeppelin_with_cdh.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/zeppelin_yarn_conf.png b/docs/assets/themes/zeppelin/img/docs-img/zeppelin_yarn_conf.png new file mode 100644 index 00000000000..435193ac15e Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/zeppelin_yarn_conf.png differ diff --git a/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png b/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png new file mode 100644 index 00000000000..b90b982e1d8 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png differ diff --git a/docs/assets/themes/zeppelin/img/screenshots/display_formula.png b/docs/assets/themes/zeppelin/img/screenshots/display_formula.png new file mode 100644 index 00000000000..ed494f0b33d Binary files /dev/null and b/docs/assets/themes/zeppelin/img/screenshots/display_formula.png differ diff --git a/docs/assets/themes/zeppelin/img/screenshots/flink-webui.png b/docs/assets/themes/zeppelin/img/screenshots/flink-webui.png new file mode 100644 index 00000000000..3fec8fedbcb Binary files /dev/null and b/docs/assets/themes/zeppelin/img/screenshots/flink-webui.png differ diff --git a/docs/assets/themes/zeppelin/img/screenshots/homepage_notebook_list.png b/docs/assets/themes/zeppelin/img/screenshots/homepage_notebook_list.png deleted file mode 100644 index a5ac6f2c9e5..00000000000 Binary files a/docs/assets/themes/zeppelin/img/screenshots/homepage_notebook_list.png and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/screenshots/interpreter_precode.png b/docs/assets/themes/zeppelin/img/screenshots/interpreter_precode.png new file mode 100644 index 00000000000..61b79c3da07 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/screenshots/interpreter_precode.png differ diff --git a/docs/assets/themes/zeppelin/img/screenshots/interpreter_setting_with_context_parameters.png b/docs/assets/themes/zeppelin/img/screenshots/interpreter_setting_with_context_parameters.png new file mode 100644 index 00000000000..17c83b6c6e9 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/screenshots/interpreter_setting_with_context_parameters.png differ diff --git a/docs/assets/themes/zeppelin/img/screenshots/selectForm-checkbox.png b/docs/assets/themes/zeppelin/img/screenshots/selectForm-checkbox.png new file mode 100644 index 00000000000..47f9fa9ea07 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/screenshots/selectForm-checkbox.png differ diff --git a/docs/assets/themes/zeppelin/img/screenshots/spark-master-webui1.png b/docs/assets/themes/zeppelin/img/screenshots/spark-master-webui1.png new file mode 100644 index 00000000000..5a4f028a01c Binary files /dev/null and b/docs/assets/themes/zeppelin/img/screenshots/spark-master-webui1.png differ diff --git a/docs/assets/themes/zeppelin/img/screenshots/user-impersonation.gif b/docs/assets/themes/zeppelin/img/screenshots/user-impersonation.gif new file mode 100644 index 00000000000..eb39bc8dac8 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/screenshots/user-impersonation.gif differ diff --git a/docs/assets/themes/zeppelin/img/spark_logo.jpg b/docs/assets/themes/zeppelin/img/spark_logo.jpg deleted file mode 100644 index a13c87019c1..00000000000 Binary files a/docs/assets/themes/zeppelin/img/spark_logo.jpg and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/spark_logo.png b/docs/assets/themes/zeppelin/img/spark_logo.png new file mode 100644 index 00000000000..afe2d1baeb8 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/spark_logo.png differ diff --git a/docs/assets/themes/zeppelin/img/ui-img/configuration_menu.png b/docs/assets/themes/zeppelin/img/ui-img/configuration_menu.png index 267de0318f9..8b62cf261a4 100644 Binary files a/docs/assets/themes/zeppelin/img/ui-img/configuration_menu.png and b/docs/assets/themes/zeppelin/img/ui-img/configuration_menu.png differ diff --git a/docs/assets/themes/zeppelin/img/ui-img/interpreter_menu.png b/docs/assets/themes/zeppelin/img/ui-img/interpreter_menu.png index 31fbcbc81ea..1724beac314 100644 Binary files a/docs/assets/themes/zeppelin/img/ui-img/interpreter_menu.png and b/docs/assets/themes/zeppelin/img/ui-img/interpreter_menu.png differ diff --git a/docs/assets/themes/zeppelin/js/lunr.min.js b/docs/assets/themes/zeppelin/js/lunr.min.js new file mode 100644 index 00000000000..b0198dff91f --- /dev/null +++ b/docs/assets/themes/zeppelin/js/lunr.min.js @@ -0,0 +1,7 @@ +/** + * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 0.7.0 + * Copyright (C) 2016 Oliver Nightingale + * MIT Licensed + * @license + */ +!function(){var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.7.0",t.utils={},t.utils.warn=function(t){return function(e){t.console&&console.warn&&console.warn(e)}}(this),t.utils.asString=function(t){return void 0===t||null===t?"":t.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var t=Array.prototype.slice.call(arguments),e=t.pop(),n=t;if("function"!=typeof e)throw new TypeError("last argument must be a function");n.forEach(function(t){this.hasHandler(t)||(this.events[t]=[]),this.events[t].push(e)},this)},t.EventEmitter.prototype.removeListener=function(t,e){if(this.hasHandler(t)){var n=this.events[t].indexOf(e);this.events[t].splice(n,1),this.events[t].length||delete this.events[t]}},t.EventEmitter.prototype.emit=function(t){if(this.hasHandler(t)){var e=Array.prototype.slice.call(arguments,1);this.events[t].forEach(function(t){t.apply(void 0,e)})}},t.EventEmitter.prototype.hasHandler=function(t){return t in this.events},t.tokenizer=function(e){return arguments.length&&null!=e&&void 0!=e?Array.isArray(e)?e.map(function(e){return t.utils.asString(e).toLowerCase()}):e.toString().trim().toLowerCase().split(t.tokenizer.seperator):[]},t.tokenizer.seperator=/[\s\-]+/,t.tokenizer.load=function(t){var e=this.registeredFunctions[t];if(!e)throw new Error("Cannot load un-registered function: "+t);return e},t.tokenizer.label="default",t.tokenizer.registeredFunctions={"default":t.tokenizer},t.tokenizer.registerFunction=function(e,n){n in this.registeredFunctions&&t.utils.warn("Overwriting existing tokenizer: "+n),e.label=n,this.registeredFunctions[n]=e},t.Pipeline=function(){this._stack=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in this.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[e.label]=e},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.registeredFunctions[e];if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._stack.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");i+=1,this._stack.splice(i,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");this._stack.splice(i,0,n)},t.Pipeline.prototype.remove=function(t){var e=this._stack.indexOf(t);-1!=e&&this._stack.splice(e,1)},t.Pipeline.prototype.run=function(t){for(var e=[],n=t.length,i=this._stack.length,r=0;n>r;r++){for(var o=t[r],s=0;i>s&&(o=this._stack[s](o,r,t),void 0!==o&&""!==o);s++);void 0!==o&&""!==o&&e.push(o)}return e},t.Pipeline.prototype.reset=function(){this._stack=[]},t.Pipeline.prototype.toJSON=function(){return this._stack.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Vector=function(){this._magnitude=null,this.list=void 0,this.length=0},t.Vector.Node=function(t,e,n){this.idx=t,this.val=e,this.next=n},t.Vector.prototype.insert=function(e,n){this._magnitude=void 0;var i=this.list;if(!i)return this.list=new t.Vector.Node(e,n,i),this.length++;if(en.idx?n=n.next:(i+=e.val*n.val,e=e.next,n=n.next);return i},t.Vector.prototype.similarity=function(t){return this.dot(t)/(this.magnitude()*t.magnitude())},t.SortedSet=function(){this.length=0,this.elements=[]},t.SortedSet.load=function(t){var e=new this;return e.elements=t,e.length=t.length,e},t.SortedSet.prototype.add=function(){var t,e;for(t=0;t1;){if(o===t)return r;t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r]}return o===t?r:-1},t.SortedSet.prototype.locationFor=function(t){for(var e=0,n=this.elements.length,i=n-e,r=e+Math.floor(i/2),o=this.elements[r];i>1;)t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r];return o>t?r:t>o?r+1:void 0},t.SortedSet.prototype.intersect=function(e){for(var n=new t.SortedSet,i=0,r=0,o=this.length,s=e.length,a=this.elements,h=e.elements;;){if(i>o-1||r>s-1)break;a[i]!==h[r]?a[i]h[r]&&r++:(n.add(a[i]),i++,r++)}return n},t.SortedSet.prototype.clone=function(){var e=new t.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},t.SortedSet.prototype.union=function(t){var e,n,i;this.length>=t.length?(e=this,n=t):(e=t,n=this),i=e.clone();for(var r=0,o=n.toArray();rp;p++)c[p]===a&&d++;h+=d/f*l.boost}}this.tokenStore.add(a,{ref:o,tf:h})}n&&this.eventEmitter.emit("add",e,this)},t.Index.prototype.remove=function(t,e){var n=t[this._ref],e=void 0===e?!0:e;if(this.documentStore.has(n)){var i=this.documentStore.get(n);this.documentStore.remove(n),i.forEach(function(t){this.tokenStore.remove(t,n)},this),e&&this.eventEmitter.emit("remove",t,this)}},t.Index.prototype.update=function(t,e){var e=void 0===e?!0:e;this.remove(t,!1),this.add(t,!1),e&&this.eventEmitter.emit("update",t,this)},t.Index.prototype.idf=function(t){var e="@"+t;if(Object.prototype.hasOwnProperty.call(this._idfCache,e))return this._idfCache[e];var n=this.tokenStore.count(t),i=1;return n>0&&(i=1+Math.log(this.documentStore.length/n)),this._idfCache[e]=i},t.Index.prototype.search=function(e){var n=this.pipeline.run(this.tokenizerFn(e)),i=new t.Vector,r=[],o=this._fields.reduce(function(t,e){return t+e.boost},0),s=n.some(function(t){return this.tokenStore.has(t)},this);if(!s)return[];n.forEach(function(e,n,s){var a=1/s.length*this._fields.length*o,h=this,u=this.tokenStore.expand(e).reduce(function(n,r){var o=h.corpusTokens.indexOf(r),s=h.idf(r),u=1,l=new t.SortedSet;if(r!==e){var c=Math.max(3,r.length-e.length);u=1/Math.log(c)}o>-1&&i.insert(o,a*s*u);for(var f=h.tokenStore.get(r),d=Object.keys(f),p=d.length,v=0;p>v;v++)l.add(f[d[v]].ref);return n.union(l)},new t.SortedSet);r.push(u)},this);var a=r.reduce(function(t,e){return t.intersect(e)});return a.map(function(t){return{ref:t,score:i.similarity(this.documentVector(t))}},this).sort(function(t,e){return e.score-t.score})},t.Index.prototype.documentVector=function(e){for(var n=this.documentStore.get(e),i=n.length,r=new t.Vector,o=0;i>o;o++){var s=n.elements[o],a=this.tokenStore.get(s)[e].tf,h=this.idf(s);r.insert(this.corpusTokens.indexOf(s),a*h)}return r},t.Index.prototype.toJSON=function(){return{version:t.version,fields:this._fields,ref:this._ref,tokenizer:this.tokenizerFn.label,documentStore:this.documentStore.toJSON(),tokenStore:this.tokenStore.toJSON(),corpusTokens:this.corpusTokens.toJSON(),pipeline:this.pipeline.toJSON()}},t.Index.prototype.use=function(t){var e=Array.prototype.slice.call(arguments,1);e.unshift(this),t.apply(this,e)},t.Store=function(){this.store={},this.length=0},t.Store.load=function(e){var n=new this;return n.length=e.length,n.store=Object.keys(e.store).reduce(function(n,i){return n[i]=t.SortedSet.load(e.store[i]),n},{}),n},t.Store.prototype.set=function(t,e){this.has(t)||this.length++,this.store[t]=e},t.Store.prototype.get=function(t){return this.store[t]},t.Store.prototype.has=function(t){return t in this.store},t.Store.prototype.remove=function(t){this.has(t)&&(delete this.store[t],this.length--)},t.Store.prototype.toJSON=function(){return{store:this.store,length:this.length}},t.stemmer=function(){var t={ational:"ate",tional:"tion",enci:"ence",anci:"ance",izer:"ize",bli:"ble",alli:"al",entli:"ent",eli:"e",ousli:"ous",ization:"ize",ation:"ate",ator:"ate",alism:"al",iveness:"ive",fulness:"ful",ousness:"ous",aliti:"al",iviti:"ive",biliti:"ble",logi:"log"},e={icate:"ic",ative:"",alize:"al",iciti:"ic",ical:"ic",ful:"",ness:""},n="[^aeiou]",i="[aeiouy]",r=n+"[^aeiouy]*",o=i+"[aeiou]*",s="^("+r+")?"+o+r,a="^("+r+")?"+o+r+"("+o+")?$",h="^("+r+")?"+o+r+o+r,u="^("+r+")?"+i,l=new RegExp(s),c=new RegExp(h),f=new RegExp(a),d=new RegExp(u),p=/^(.+?)(ss|i)es$/,v=/^(.+?)([^s])s$/,g=/^(.+?)eed$/,m=/^(.+?)(ed|ing)$/,y=/.$/,S=/(at|bl|iz)$/,w=new RegExp("([^aeiouylsz])\\1$"),k=new RegExp("^"+r+i+"[^aeiouwxy]$"),x=/^(.+?[^aeiou])y$/,b=/^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/,E=/^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/,F=/^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/,_=/^(.+?)(s|t)(ion)$/,z=/^(.+?)e$/,O=/ll$/,P=new RegExp("^"+r+i+"[^aeiouwxy]$"),T=function(n){var i,r,o,s,a,h,u;if(n.length<3)return n;if(o=n.substr(0,1),"y"==o&&(n=o.toUpperCase()+n.substr(1)),s=p,a=v,s.test(n)?n=n.replace(s,"$1$2"):a.test(n)&&(n=n.replace(a,"$1$2")),s=g,a=m,s.test(n)){var T=s.exec(n);s=l,s.test(T[1])&&(s=y,n=n.replace(s,""))}else if(a.test(n)){var T=a.exec(n);i=T[1],a=d,a.test(i)&&(n=i,a=S,h=w,u=k,a.test(n)?n+="e":h.test(n)?(s=y,n=n.replace(s,"")):u.test(n)&&(n+="e"))}if(s=x,s.test(n)){var T=s.exec(n);i=T[1],n=i+"i"}if(s=b,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+t[r])}if(s=E,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+e[r])}if(s=F,a=_,s.test(n)){var T=s.exec(n);i=T[1],s=c,s.test(i)&&(n=i)}else if(a.test(n)){var T=a.exec(n);i=T[1]+T[2],a=c,a.test(i)&&(n=i)}if(s=z,s.test(n)){var T=s.exec(n);i=T[1],s=c,a=f,h=P,(s.test(i)||a.test(i)&&!h.test(i))&&(n=i)}return s=O,a=c,s.test(n)&&a.test(n)&&(s=y,n=n.replace(s,"")),"y"==o&&(n=o.toLowerCase()+n.substr(1)),n};return T}(),t.Pipeline.registerFunction(t.stemmer,"stemmer"),t.generateStopWordFilter=function(t){var e=t.reduce(function(t,e){return t[e]=e,t},{});return function(t){return t&&e[t]!==t?t:void 0}},t.stopWordFilter=t.generateStopWordFilter(["a","able","about","across","after","all","almost","also","am","among","an","and","any","are","as","at","be","because","been","but","by","can","cannot","could","dear","did","do","does","either","else","ever","every","for","from","get","got","had","has","have","he","her","hers","him","his","how","however","i","if","in","into","is","it","its","just","least","let","like","likely","may","me","might","most","must","my","neither","no","nor","not","of","off","often","on","only","or","other","our","own","rather","said","say","says","she","should","since","so","some","than","that","the","their","them","then","there","these","they","this","tis","to","too","twas","us","wants","was","we","were","what","when","where","which","while","who","whom","why","will","with","would","yet","you","your"]),t.Pipeline.registerFunction(t.stopWordFilter,"stopWordFilter"),t.trimmer=function(t){return t.replace(/^\W+/,"").replace(/\W+$/,"")},t.Pipeline.registerFunction(t.trimmer,"trimmer"),t.TokenStore=function(){this.root={docs:{}},this.length=0},t.TokenStore.load=function(t){var e=new this;return e.root=t.root,e.length=t.length,e},t.TokenStore.prototype.add=function(t,e,n){var n=n||this.root,i=t.charAt(0),r=t.slice(1);return i in n||(n[i]={docs:{}}),0===r.length?(n[i].docs[e.ref]=e,void(this.length+=1)):this.add(r,e,n[i])},t.TokenStore.prototype.has=function(t){if(!t)return!1;for(var e=this.root,n=0;nFound '+results.length+' result(s)


'); + + results.forEach(function(result) { + var item = loaded_data[result.ref]; + var appendString = ''+item.title+'

'+item.excerpt+'


'; + + $search_results.append(appendString); + }); + } else { + $search_results.html('

Your search did not match any documents.
Make sure that all words are spelled correctly or try more general keywords.

'); + } + }); + } +}); diff --git a/docs/atom.xml b/docs/atom.xml index 73acc07dee2..7ec29339dd6 100644 --- a/docs/atom.xml +++ b/docs/atom.xml @@ -1,6 +1,6 @@ --- layout: nil -title : Atom Feed +title : --- diff --git a/docs/development/howtocontribute.md b/docs/development/howtocontribute.md index 2d3842a2257..c2948ca15cf 100644 --- a/docs/development/howtocontribute.md +++ b/docs/development/howtocontribute.md @@ -1,9 +1,23 @@ --- layout: page -title: "How to contribute" -description: "How to contribute" +title: "Contributing to Apache Zeppelin (Code)" +description: "How can you contribute to Apache Zeppelin project? This document covers from setting up your develop environment to making a pull request on Github." group: development --- + +{% include JB/setup %} # Contributing to Apache Zeppelin ( Code ) @@ -50,6 +64,9 @@ git clone -b branch-0.5.6 git://git.apache.org/zeppelin.git zeppelin Apache Zeppelin follows [Fork & Pull](https://github.com/sevntu-checkstyle/sevntu.checkstyle/wiki/Development-workflow-with-Git:-Fork,-Branching,-Commits,-and-Pull-Request) as a source control workflow. If you want to not only build Zeppelin but also make any changes, then you need to fork [Zeppelin github mirror repository](https://github.com/apache/zeppelin) and make a pull request. +Before making a pull request, please take a look [Contribution Guidelines](http://zeppelin.apache.org/contribution/contributions.html). + + ### Build ``` @@ -91,12 +108,30 @@ Server will be run on [http://localhost:8080](http://localhost:8080). Some portions of the Zeppelin code are generated by [Thrift](http://thrift.apache.org). For most Zeppelin changes, you don't need to worry about this. But if you modify any of the Thrift IDL files (e.g. zeppelin-interpreter/src/main/thrift/*.thrift), then you also need to regenerate these files and submit their updated version as part of your patch. -To regenerate the code, install **thrift-0.9.2** and change directory into Zeppelin source directory. and then run following command +To regenerate the code, install **thrift-0.9.2** and then run the following command to generate thrift code. ``` -thrift -out zeppelin-interpreter/src/main/java/ --gen java zeppelin-interpreter/src/main/thrift/RemoteInterpreterService.thrift +cd /zeppelin-interpreter/src/main/thrift +./genthrift.sh +``` + +### Run Selenium test + +Zeppelin has [set of integration tests](https://github.com/apache/zeppelin/tree/master/zeppelin-server/src/test/java/org/apache/zeppelin/integration) using Selenium. To run these test, first build and run Zeppelin and make sure Zeppelin is running on port 8080. Then you can run test using following command + ``` +TEST_SELENIUM=true mvn test -Dtest=[TEST_NAME] -DfailIfNoTests=false -pl 'zeppelin-interpreter,zeppelin-zengine,zeppelin-server' +``` + +For example, to run [ParagraphActionIT](https://github.com/apache/zeppelin/blob/master/zeppelin-server/src/test/java/org/apache/zeppelin/integration/ParagraphActionsIT.java), + +``` +TEST_SELENIUM=true mvn test -Dtest=ParagraphActionsIT -DfailIfNoTests=false -pl 'zeppelin-interpreter,zeppelin-zengine,zeppelin-server' +``` + +You'll need Firefox web browser installed in your development environment. While CI server uses [Firefox 31.0](https://ftp.mozilla.org/pub/firefox/releases/31.0/) to run selenium test, it is good idea to install the same version (disable auto update to keep the version). + ## Where to Start You can find issues for beginner & newbie diff --git a/docs/development/howtocontributewebsite.md b/docs/development/howtocontributewebsite.md index 0db15551585..0c2bafc5b64 100644 --- a/docs/development/howtocontributewebsite.md +++ b/docs/development/howtocontributewebsite.md @@ -1,9 +1,23 @@ --- layout: page -title: "How to contribute (website)" -description: "How to contribute (website)" +title: "Contributing to Apache Zeppelin (Website)" +description: "How can you contribute to Apache Zeppelin project website? This document covers from building Zeppelin documentation site to making a pull request on Github." group: development --- + +{% include JB/setup %} # Contributing to Apache Zeppelin ( Website ) @@ -48,7 +62,7 @@ When you are ready, just make a pull-request. ## Alternative way -You can directly edit `.md` files in `/docs/` directory at the web interface of github and make pull-request immediatly. +You can directly edit `.md` files in `/docs/` directory at the web interface of github and make pull-request immediately. ## Stay involved Contributors should join the Zeppelin mailing lists. diff --git a/docs/development/writingzeppelinapplication.md b/docs/development/writingzeppelinapplication.md new file mode 100644 index 00000000000..265efcf7120 --- /dev/null +++ b/docs/development/writingzeppelinapplication.md @@ -0,0 +1,185 @@ +--- +layout: page +title: "Writing a new Application" +description: "Apache Zeppelin Application is a package that runs on Interpreter process and displays it's output inside of the notebook. Make your own Application in Apache Zeppelin is quite easy." +group: development +--- + +{% include JB/setup %} + +# Writing a new Application + +
+ +## What is Apache Zeppelin Application + +Apache Zeppelin Application is a package that runs on Interpreter process and displays it's output inside of the notebook. While application runs on Interpreter process, it's able to access resources provided by Interpreter through ResourcePool. Output is always rendered by AngularDisplaySystem. Therefore application provides all the possiblities of making interactive graphical application that uses data and processing power of any Interpreter. + + +## Make your own Application + +Writing Application means extending `org.apache.zeppelin.helium.Application`. You can use your favorite IDE and language while Java class files are packaged into jar. `Application` class looks like + +```java + +/** + * Constructor. Invoked when application is loaded + */ +public Application(ApplicationContext context); + +/** + * Invoked when there're (possible) updates in required resource set. + * i.e. invoked after application load and after paragraph finishes. + */ +public abstract void run(ResourceSet args); + +/** + * Invoked before application unload. + * Application is automatically unloaded with paragraph/notebook removal + */ +public abstract void unload(); +``` + + +You can check example applications under [./zeppelin-examples](https://github.com/apache/incubator-zeppelin/tree/master/zeppelin-examples) directory. + + +## Development mode + +In the development mode, you can run your Application in your IDE as a normal java application and see the result inside of Zeppelin notebook. + +`org.apache.zeppelin.helium.ZeppelinApplicationDevServer` can run Zeppelin Application in development mode. + +```java + +// entry point for development mode +public static void main(String[] args) throws Exception { + + // add resources for development mode + LocalResourcePool pool = new LocalResourcePool("dev"); + pool.put("date", new Date()); + + // run application in devlopment mode with given resource + // in this case, Clock.class.getName() will be the application class name + org.apache.zeppelin.helium.ZeppelinApplicationDevServer devServer = new org.apache.zeppelin.helium.ZeppelinApplicationDevServer( + Clock.class.getName(), pool.getAll()); + + // start development mode + devServer.start(); + devServer.join(); +} +``` + + +In the Zeppelin notebook, run `%dev run` will connect to application running in development mode. + + +## Package file + +Package file is a json file that provides information about the application. +Json file contains the following information + +```json +{ + "name" : "[organization].[name]", + "description" : "Description", + "artifact" : "groupId:artifactId:version", + "className" : "your.package.name.YourApplicationClass", + "resources" : [ + ["resource.name", ":resource.class.name"], + ["alternative.resource.name", ":alternative.class.name"] + ], + "icon" : "" +} +``` + +#### name + +Name is a string in `[group].[name]` format. +`[group]` and `[name]` allow only `[A-Za-z0-9_]`. +Group is normally the name of an organization who creates this application. + +#### description + +A short description about the application + +#### artifact + +Location of the jar artifact. +`"groupId:artifactId:version"` will load artifact from maven repository. +If jar exists in the local filesystem, absolute/relative can be used. + +e.g. + +When artifact exists in Maven repository + +``` +artifact: "org.apache.zeppelin:zeppelin-examples:0.6.0" +``` + +When artifact exists in the local filesystem + +``` +artifact: "zeppelin-example/target/zeppelin-example-0.6.0.jar" +``` + +#### className + +Entry point. Class that extends `org.apache.zeppelin.helium.Application` + +#### resources + +Two dimensional array that defines required resources by name or by className. Helium Application launcher will compare resources in the ResourcePool with the information in this field and suggest application only when all required resources are available in the ResourcePool. + +Resouce name is a string which will be compared with the name of objects in the ResourcePool. className is a string with ":" prepended, which will be compared with className of the objects in the ResourcePool. + +Application may require two or more resources. Required resources can be listed inside of the json array. For example, if the application requires object "name1", "name2" and "className1" type of object to run, resources field can be + +``` +resources: [ + [ "name1", "name2", ":className1", ...] +] +``` + +If Application can handle alternative combination of required resources, alternative set can be listed as below. + +``` +resources: [ + [ "name", ":className"], + [ "altName", ":altClassName1"], + ... +] +``` + +Easier way to understand this scheme is + +``` +resources: [ + [ 'resource' AND 'resource' AND ... ] OR + [ 'resource' AND 'resource' AND ... ] OR + ... +] +``` + + +#### icon + +Icon to be used on the application button. String in this field will be rendered as a HTML tag. + +e.g. + +``` +icon: "" +``` diff --git a/docs/development/writingzeppelininterpreter.md b/docs/development/writingzeppelininterpreter.md index 8061a89c36d..c5fb23ccad0 100644 --- a/docs/development/writingzeppelininterpreter.md +++ b/docs/development/writingzeppelininterpreter.md @@ -1,7 +1,7 @@ --- layout: page -title: "Writing Zeppelin Interpreter" -description: "" +title: "Writing a New Interpreter" +description: "Apache Zeppelin Interpreter is a language backend. Every Interpreters belongs to an InterpreterGroup. Interpreters in the same InterpreterGroup can reference each other." group: development --- +{% include JB/setup %} + +# Writing a new Spell + +
+ +## What is Apache Zeppelin Spell + +Spell is a kind of interpreter that runs on browser not on backend. So, technically it's the frontend interpreter. +It can provide many benefits. + +- Spell is pluggable frontend interpreter. So it can be installed and removed easily using helium registry. +- Every spell is written in javascript. It means you can use existing javascript libraries whatever you want. +- Spell runs on browser like display system (`%html`, `%table`). In other words, every spell can be used as display system as well. + +## How it works + +Helium Spell works like [Helium Visualization](./writingzeppelinvisualization.html). + +- Every helium packages are loaded from central (online) registry or local registry +- You can see loaded packages in `/helium` page. +- When you enable a spell, it's built from server and sent to client +- Finally it will be loaded into browser. + +## How to use spell + +### 1. Enabling + +Find a spell what you want to use in `/helium` package and click `Enable` button. + + + +### 2. Using + +Spell works like an interpreter. Use the `MAGIC` value to execute spell in a note. (you might need to refresh after enabling) +For example, Use `%echo` for the Echo Spell. + + + + +## Write a new Spell + +Making a new spell is similar to [Helium Visualization#write-new-visualization](./writingzeppelinvisualization.html#write-new-visualization). + +- Add framework dependency called zeppelin-spell into `package.json` +- Write code using framework +- Publish your spell to [npm]("https://www.npmjs.com/") + +### 1. Create a npm package + +Create a [package.json](https://docs.npmjs.com/files/package.json) in new directory for spell. + +- You have to add a framework called `zeppelin-spell` as a dependency to create spell ([zeppelin-spell](https://github.com/apache/zeppelin/tree/master/zeppelin-web/src/app/spell)) +- Also, you can add any dependencies you want to utilise. + +Here's an example + +```json +{ + "name": "zeppelin-echo-spell", + "description": "Zeppelin Echo Spell (example)", + "version": "1.0.0", + "main": "index", + "author": "", + "license": "Apache-2.0", + "dependencies": { + "zeppelin-spell": "*" + }, + "helium": { + "icon" : "", + "spell": { + "magic": "%echo", + "usage": "%echo " + } + } +} +``` + +### 2. Write spell using framework + +Here are some examples you can refer + +- [Echo Spell](https://github.com/apache/zeppelin/blob/master/zeppelin-examples/zeppelin-example-spell-echo/index.js) +- [Markdown Spell: Using library](https://github.com/apache/zeppelin/blob/master/zeppelin-examples/zeppelin-example-spell-markdown/index.js) +- [Flowchart Spell: Using DOM](https://github.com/apache/zeppelin/blob/master/zeppelin-examples/zeppelin-example-spell-flowchart/index.js) +- [Google Translation API Spell: Using API (returning promise)](https://github.com/apache/zeppelin/blob/master/zeppelin-examples/zeppelin-example-spell-translator/index.js) + +Now, you need to write code to create spell which processing text. + +```js +import { + SpellBase, + SpellResult, + DefaultDisplayType, +} from 'zeppelin-spell'; + +export default class EchoSpell extends SpellBase { + constructor() { + /** pass magic to super class's constructor parameter */ + super("%echo"); + } + + interpret(paragraphText) { + const processed = paragraphText + '!'; + + /** + * should return `SpellResult` which including `data` and `type` + * default type is `TEXT` if you don't specify. + */ + return new SpellResult(processed); + } +} +``` + +Here is another example. Let's say we want to create markdown spell. First of all, we should add a dependency for markdown in package.json + +```json +// package.json + "dependencies": { + "markdown": "0.5.0", + "zeppelin-spell": "*" + }, +``` + +And here is spell code. + +```js +import { + SpellBase, + SpellResult, + DefaultDisplayType, +} from 'zeppelin-spell'; + +import md from 'markdown'; + +const markdown = md.markdown; + +export default class MarkdownSpell extends SpellBase { + constructor() { + super("%markdown"); + } + + interpret(paragraphText) { + const parsed = markdown.toHTML(paragraphText); + + /** + * specify `DefaultDisplayType.HTML` since `parsed` will contain DOM + * otherwise it will be rendered as `DefaultDisplayType.TEXT` (default) + */ + return new SpellResult(parsed, DefaultDisplayType.HTML); + } +} +``` + +- You might want to manipulate DOM directly (e.g google d3.js), then refer [Flowchart Spell](https://github.com/apache/zeppelin/blob/master/zeppelin-examples/zeppelin-example-spell-flowchart/index.js) +- You might want to return promise not string (e.g API call), then refer [Google Translation API Spell](https://github.com/apache/zeppelin/blob/master/zeppelin-examples/zeppelin-example-spell-translator/index.js) + +### 3. Create __Helium package file__ for local deployment + +You don't want to publish your package every time you make a change in your spell. Zeppelin provides local deploy. +The only thing you need to do is creating a __Helium Package file__ (JSON) for local deploy. +It's automatically created when you publish to npm repository but in local case, you should make it by yourself. + +```json +{ + "type" : "SPELL", + "name" : "zeppelin-echo-spell", + "version": "1.0.0", + "description" : "Return just what receive (example)", + "artifact" : "./zeppelin-examples/zeppelin-example-spell-echo", + "license" : "Apache-2.0", + "spell": { + "magic": "%echo", + "usage": "%echo " + } +} +``` + +- Place this file in your local registry directory (default `$ZEPPELIN_HOME/helium`). +- `type` should be `SPELL` +- Make sure that `artifact` should be same as your spell directory. +- You can get information about other fields in [Helium Visualization#3-create-helium-package-file-and-locally-deploy](./writingzeppelinvisualization.html#3-create-helium-package-file-and-locally-deploy). + +### 4. Run in dev mode + +```bash +cd zeppelin-web +yarn run dev:helium +``` + +You can browse localhost:9000. Every time refresh your browser, Zeppelin will rebuild your spell and reload changes. + +### 5. Publish your spell to the npm repository + +See [Publishing npm packages](https://docs.npmjs.com/getting-started/publishing-npm-packages) diff --git a/docs/development/writingzeppelinvisualization.md b/docs/development/writingzeppelinvisualization.md new file mode 100644 index 00000000000..18b686ca5e1 --- /dev/null +++ b/docs/development/writingzeppelinvisualization.md @@ -0,0 +1,205 @@ +--- +layout: page +title: "Writing a new Visualization" +description: "Apache Zeppelin Visualization is a pluggable package that can be loaded/unloaded on runtime through Helium framework in Zeppelin. A Visualization is a javascript npm package and user can use them just like any other built-in visualization in a note." +group: development +--- + +{% include JB/setup %} + +# Writing a new Visualization + +
+ +## What is Apache Zeppelin Visualization + +Apache Zeppelin Visualization is a pluggable package that can be loaded/unloaded on runtime through Helium framework in Zeppelin. A Visualization is a javascript npm package and user can use them just like any other built-in visualization in notebook. + + +## How it works + + +#### 1. Load Helium package files from registry + +Zeppelin needs to know what Visualization packages are available. Zeppelin will read information of packages from both online and local registry. +Registries are configurable through `ZEPPELIN_HELIUM_LOCALREGISTRY_DEFAULT` env variable or `zeppelin.helium.localregistry.default` property. + +#### 2. Enable packages +Once Zeppelin loads _Helium package files_ from registries, available packages are displayed in Helium menu. + +Click 'enable' button. + + + + +#### 3. Create and load visualization bundle on the fly + +Once a Visualization package is enabled, [HeliumBundleFactory](https://github.com/apache/zeppelin/blob/master/zeppelin-zengine/src/main/java/org/apache/zeppelin/helium/HeliumBundleFactory.java) creates a js bundle. The js bundle is served by `helium/bundle/load` rest api endpoint. + +#### 4. Run visualization + +Zeppelin shows additional button for loaded Visualizations. +User can use just like any other built-in visualizations. + + + + + +## Write new Visualization + +#### 1. Create a npm package + +Create a [package.json](https://docs.npmjs.com/files/package.json) in your new Visualization directory. You can add any dependencies in package.json, but you **must include two dependencies: [zeppelin-vis](https://github.com/apache/zeppelin/tree/master/zeppelin-web/src/app/visualization) and [zeppelin-tabledata](https://github.com/apache/zeppelin/tree/master/zeppelin-web/src/app/tabledata).** + +Here's an example + +```json +{ + "name": "zeppelin_horizontalbar", + "description" : "Horizontal Bar chart", + "version": "1.0.0", + "main": "horizontalbar", + "author": "", + "license": "Apache-2.0", + "dependencies": { + "zeppelin-tabledata": "*", + "zeppelin-vis": "*" + } +} +``` + +#### 2. Create your own visualization + +To create your own visualization, you need to create a js file and import [Visualization](https://github.com/apache/zeppelin/blob/master/zeppelin-web/src/app/visualization/visualization.js) class from [zeppelin-vis](https://github.com/apache/zeppelin/tree/master/zeppelin-web/src/app/visualization) package and extend the class. [zeppelin-tabledata](https://github.com/apache/zeppelin/tree/master/zeppelin-web/src/app/tabledata) package provides some useful transformations, like pivot, you can use in your visualization. (you can create your own transformation, too). + +[Visualization](https://github.com/apache/zeppelin/blob/master/zeppelin-web/src/app/visualization/visualization.js) class, there're several methods that you need to override and implement. Here's simple visualization that just prints `Hello world`. + +```js +import Visualization from 'zeppelin-vis' +import PassthroughTransformation from 'zeppelin-tabledata/passthrough' + +export default class helloworld extends Visualization { + constructor(targetEl, config) { + super(targetEl, config) + this.passthrough = new PassthroughTransformation(config); + } + + render(tableData) { + this.targetEl.html('Hello world!') + } + + getTransformation() { + return this.passthrough + } +} +``` + +To learn more about `Visualization` class, check [visualization.js](https://github.com/apache/zeppelin/blob/master/zeppelin-web/src/app/visualization/visualization.js). + +You can check complete visualization package example [here](https://github.com/apache/zeppelin/tree/master/zeppelin-examples/zeppelin-example-horizontalbar). + +Zeppelin's built-in visualization uses the same API, so you can check [built-in visualizations](https://github.com/apache/zeppelin/tree/master/zeppelin-web/src/app/visualization/builtins) as additional examples. + + +#### 3. Create __Helium package file__ and locally deploy + +__Helium Package file__ is a json file that provides information about the application. +Json file contains the following information + +```json +{ + "type" : "VISUALIZATION", + "name" : "zeppelin_horizontalbar", + "description" : "Horizontal Bar chart (example)", + "license" : "Apache-2.0", + "artifact" : "./zeppelin-examples/zeppelin-example-horizontalbar", + "icon" : "" +} +``` + +Place this file in your local registry directory (default `./helium`). + + +##### type + +When you're creating a visualization, 'type' should be 'VISUALIZATION'. +Check [application](./writingzeppelinapplication.html) type if you're interested in the other types of package. + +##### name + +Name of visualization. Should be unique. Allows `[A-Za-z90-9_]`. + + +##### description + +A short description about visualization. + +##### artifact + +Location of the visualization npm package. Support npm package with version or local filesystem path. + +e.g. + +When artifact exists in npm repository + +```json +"artifact": "my-visualiztion@1.0.0" +``` + + +When artifact exists in local file system + +```json +"artifact": "/path/to/my/visualization" +``` + +##### license + +License information. + +e.g. + +```json +"license": "Apache-2.0" +``` + +##### icon + +Icon to be used in visualization select button. String in this field will be rendered as a HTML tag. + +e.g. + +```json +"icon": "" +``` + + +#### 4. Run in dev mode + +Place your __Helium package file__ in local registry (ZEPPELIN_HOME/helium). +Run Zeppelin. And then run zeppelin-web in visualization dev mode. + +```bash +cd zeppelin-web +yarn run dev:helium +``` + +You can browse localhost:9000. Everytime refresh your browser, Zeppelin will rebuild your visualization and reload changes. + + +#### 5. Publish your visualization + +Once it's done, publish your visualization package using `npm publish`. +That's it. With in an hour, your visualization will be available in Zeppelin's helium menu. diff --git a/docs/development/writingzeppelinvisualization_transformation.md b/docs/development/writingzeppelinvisualization_transformation.md new file mode 100644 index 00000000000..22bf130b124 --- /dev/null +++ b/docs/development/writingzeppelinvisualization_transformation.md @@ -0,0 +1,281 @@ +--- +layout: page +title: "Transformations for Zeppelin Visualization" +description: "Description for Transformations" +group: development +--- + +{% include JB/setup %} + +# Transformations for Zeppelin Visualization + +
+ +## Overview + +Transformations + +- **renders** setting which allows users to set columns and +- **transforms** table rows according to the configured columns. + +Zeppelin provides 4 types of transformations. + +## 1. PassthroughTransformation + +`PassthroughTransformation` is the simple transformation which does not convert original tabledata at all. + +See [passthrough.js](https://github.com/apache/zeppelin/blob/master/zeppelin-web/src/app/tabledata/passthrough.js) + +## 2. ColumnselectorTransformation + +`ColumnselectorTransformation` is uses when you need `N` axes but do not need aggregation. + +See [columnselector.js](https://github.com/apache/zeppelin/blob/master/zeppelin-web/src/app/tabledata/columnselector.js) + +## 3. PivotTransformation + +`PivotTransformation` provides group by and aggregation. Every chart using `PivotTransformation` has 3 axes. `Keys`, `Groups` and `Values`. + +See [pivot.js](https://github.com/apache/zeppelin/blob/master/zeppelin-web/src/app/tabledata/pivot.js) + +## 4. AdvancedTransformation + +`AdvancedTransformation` has more detailed options while providing existing features of `PivotTransformation` and `ColumnselectorTransformation` + +- multiple sub charts +- configurable chart axes +- parameter widgets: `input`, `checkbox`, `option`, `textarea` +- parsing parameters automatically based on their types +- expand / fold axis and parameter panels +- multiple transformation methods while supporting lazy converting +- re-initialize the whole configuration based on spec hash. + +### Spec + +`AdvancedTransformation` requires `spec` which includes axis and parameter details for charts. + +Let's create 2 sub-charts called `line` and `no-group`. Each sub chart can have different axis and parameter depending on their requirements. + +
+ +```js +class AwesomeVisualization extends Visualization { + constructor(targetEl, config) { + super(targetEl, config) + + const spec = { + charts: { + 'line': { + transform: { method: 'object', }, + sharedAxis: false, /** set if you want to share axes between sub charts, default is `false` */ + axis: { + 'xAxis': { dimension: 'multiple', axisType: 'key', description: 'serial', }, + 'yAxis': { dimension: 'multiple', axisType: 'aggregator', description: 'serial', }, + 'category': { dimension: 'multiple', axisType: 'group', description: 'categorical', }, + }, + parameter: { + 'xAxisUnit': { valueType: 'string', defaultValue: '', description: 'unit of xAxis', }, + 'yAxisUnit': { valueType: 'string', defaultValue: '', description: 'unit of yAxis', }, + 'lineWidth': { valueType: 'int', defaultValue: 0, description: 'width of line', }, + }, + }, + + 'no-group': { + transform: { method: 'object', }, + sharedAxis: false, + axis: { + 'xAxis': { dimension: 'single', axisType: 'key', }, + 'yAxis': { dimension: 'multiple', axisType: 'value', }, + }, + parameter: { + 'xAxisUnit': { valueType: 'string', defaultValue: '', description: 'unit of xAxis', }, + 'yAxisUnit': { valueType: 'string', defaultValue: '', description: 'unit of yAxis', }, + }, + }, + } + + this.transformation = new AdvancedTransformation(config, spec) + } + + ... + + // `render` will be called whenever `axis` or `parameter` is changed + render(data) { + const { chart, parameter, column, transformer, } = data + + if (chart === 'line') { + const transformed = transformer() + // draw line chart + } else if (chart === 'no-group') { + const transformed = transformer() + // draw no-group chart + } + } +} +``` + +
+ +### Spec: `axis` + +| Field Name | Available Values (type) | Description | +| --- | --- | --- | +|`dimension` | `single` | Axis can contains only 1 column | +|`dimension` | `multiple` | Axis can contains multiple columns | +|`axisType` | `key` | Column(s) in this axis will be used as `key` like in `PivotTransformation`. These columns will be served in `column.key` | +|`axisType` | `aggregator` | Column(s) in this axis will be used as `value` like in `PivotTransformation`. These columns will be served in `column.aggregator` | +|`axisType` | `group` | Column(s) in this axis will be used as `group` like in `PivotTransformation`. These columns will be served in `column.group` | +|`axisType` | (string) | Any string value can be used here. These columns will be served in `column.custom` | +|`maxAxisCount` (optional) | (int) | The max number of columns that this axis can contain. (unlimited if `undefined`) | +|`minAxisCount` (optional) | (int) | The min number of columns that this axis should contain to draw chart. (`1` in case of single dimension) | +|`description` (optional) | (string) | Description for the axis. | + +
+ +Here is an example. + +```js +axis: { + 'xAxis': { dimension: 'multiple', axisType: 'key', }, + 'yAxis': { dimension: 'multiple', axisType: 'aggregator'}, + 'category': { dimension: 'multiple', axisType: 'group', maxAxisCount: 2, valueType: 'string', }, +}, +``` + +
+ +### Spec: `sharedAxis` + +If you set `sharedAxis: false` for sub charts, then their axes are persisted in global space (shared). It's useful for when you creating multiple sub charts sharing their axes but have different parameters. For example, + +- `basic-column`, `stacked-column`, `percent-column` +- `pie` and `donut` + +
+ +Here is an example. + +```js + const spec = { + charts: { + 'column': { + transform: { method: 'array', }, + sharedAxis: true, + axis: { ... }, + parameter: { ... }, + }, + + 'stacked': { + transform: { method: 'array', }, + sharedAxis: true, + axis: { ... } + parameter: { ... }, + }, +``` + +
+ +### Spec: `parameter` + +| Field Name | Available Values (type) | Description | +| --- | --- | --- | +|`valueType` | `string` | Parameter which has string value | +|`valueType` | `int` | Parameter which has int value | +|`valueType` | `float` | Parameter which has float value | +|`valueType` | `boolean` | Parameter which has boolean value used with `checkbox` widget usually | +|`valueType` | `JSON` | Parameter which has JSON value used with `textarea` widget usually. `defaultValue` should be `""` (empty string). This ||`defaultValue` | (any) | Default value of this parameter. `JSON` type should have `""` (empty string) | +|`description` | (string) | Description of this parameter. This value will be parsed as HTML for pretty output | +|`widget` | `input` | Use [input](https://developer.mozilla.org/en/docs/Web/HTML/Element/input) widget. This is the default widget (if `widget` is undefined)| +|`widget` | `checkbox` | Use [checkbox](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input/checkbox) widget. | +|`widget` | `textarea` | Use [textarea](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/textarea) widget. | +|`widget` | `option` | Use [select + option](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/select) widget. This parameter should have `optionValues` field as well. | +|`optionValues` | (Array) | Available option values used with the `option` widget | + +
+ +Here is an example. + +```js +parameter: { + // string type, input widget + 'xAxisUnit': { valueType: 'string', defaultValue: '', description: 'unit of xAxis', }, + + // boolean type, checkbox widget + 'inverted': { widget: 'checkbox', valueType: 'boolean', defaultValue: false, description: 'invert x and y axes', }, + + // string type, option widget with `optionValues` + 'graphType': { widget: 'option', valueType: 'string', defaultValue: 'line', description: 'graph type', optionValues: [ 'line', 'smoothedLine', 'step', ], }, + + // HTML in `description` + 'dateFormat': { valueType: 'string', defaultValue: '', description: 'format of date (doc) (e.g YYYY-MM-DD)', }, + + // JSON type, textarea widget + 'yAxisGuides': { widget: 'textarea', valueType: 'JSON', defaultValue: '', description: 'guides of yAxis ', }, +``` + +
+ +### Spec: `transform` + +| Field Name | Available Values (type) | Description | +| --- | --- | --- | +|`method` | `object` | designed for rows requiring object manipulation | +|`method` | `array` | designed for rows requiring array manipulation | +|`method` | `array:2-key` | designed for xyz charts (e.g bubble chart) | +|`method` | `drill-down` | designed for drill-down charts | +|`method` | `raw` | will return the original `tableData.rows` | + +
+ +Whatever you specified as `transform.method`, the `transformer` value will be always function for lazy computation. + +```js +// advanced-transformation.util#getTransformer + +if (transformSpec.method === 'raw') { + transformer = () => { return rows; } +} else if (transformSpec.method === 'array') { + transformer = () => { + ... + return { ... } + } +} +``` + +Here is actual usage. + +```js +class AwesomeVisualization extends Visualization { + constructor(...) { /** setup your spec */ } + + ... + + // `render` will be called whenever `axis` or `parameter` are changed + render(data) { + const { chart, parameter, column, transformer, } = data + + if (chart === 'line') { + const transformed = transformer() + // draw line chart + } else if (chart === 'no-group') { + const transformed = transformer() + // draw no-group chart + } + } + + ... +} +``` + diff --git a/docs/displaysystem/back-end-angular.md b/docs/displaysystem/back-end-angular.md index d84a033d6fd..fb43ea41a6b 100644 --- a/docs/displaysystem/back-end-angular.md +++ b/docs/displaysystem/back-end-angular.md @@ -1,7 +1,7 @@ --- layout: page -title: "Angular (backend API)" -description: "Angular (backend API)" +title: "Back-end Angular API in Apache Zeppelin" +description: "Apache Zeppelin provides a gateway between your interpreter and your compiled AngularJS view templates. You can not only update scope variables from your interpreter but also watch them in the interpreter, which is JVM process." group: display --- +{% include JB/setup %} + +## Building from Source + +
+ +If you want to build from source, you must first install the following dependencies: + + + + + + + + + + + + + + + + + + +
NameValue
Git(Any Version)
Maven3.1.x or higher
JDK1.7
+ + +If you haven't installed Git and Maven yet, check the [Build requirements](#build-requirements) section and follow the step by step instructions from there. + + +####1. Clone the Apache Zeppelin repository + +``` +git clone https://github.com/apache/zeppelin.git +``` + +####2. Build source + + +You can build Zeppelin with following maven command: + +``` +mvn clean package -DskipTests [Options] +``` + +If you're unsure about the options, use the same commands that creates official binary package. + +```bash +# update all pom.xml to use scala 2.11 +./dev/change_scala_version.sh 2.11 +# build zeppelin with all interpreters and include latest version of Apache spark support for local mode. +mvn clean package -DskipTests -Pspark-2.0 -Phadoop-2.4 -Pr -Pscala-2.11 +``` + +####3. Done +You can directly start Zeppelin by running after successful build: + +```bash +./bin/zeppelin-daemon.sh start +``` + +Check [build-profiles](#build-profiles) section for further build options. +If you are behind proxy, follow instructions in [Proxy setting](#proxy-setting-optional) section. + +If you're interested in contribution, please check [Contributing to Apache Zeppelin (Code)](../development/howtocontribute.html) and [Contributing to Apache Zeppelin (Website)](../../development/contributewebsite.html). + +### Build profiles + +#### Spark Interpreter + +To build with a specific Spark version, Hadoop version or specific features, define one or more of the following profiles and options: + +##### `-Pspark-[version]` + +Set spark major version + +Available profiles are + +``` +-Pspark-2.1 +-Pspark-2.0 +-Pspark-1.6 +-Pspark-1.5 +-Pspark-1.4 +-Pcassandra-spark-1.5 +-Pcassandra-spark-1.4 +-Pcassandra-spark-1.3 +-Pcassandra-spark-1.2 +-Pcassandra-spark-1.1 +``` + +minor version can be adjusted by `-Dspark.version=x.x.x` + + +##### `-Phadoop-[version]` + +set hadoop major version + +Available profiles are + +``` +-Phadoop-0.23 +-Phadoop-1 +-Phadoop-2.2 +-Phadoop-2.3 +-Phadoop-2.4 +-Phadoop-2.6 +-Phadoop-2.7 +``` + +minor version can be adjusted by `-Dhadoop.version=x.x.x` + +##### `-Pscala-[version] (optional)` + +set scala version (default 2.10) +Available profiles are + +``` +-Pscala-2.10 +-Pscala-2.11 +``` + +##### `-Pr` (optional) + +enable [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration. + +##### `-Pvendor-repo` (optional) + +enable 3rd party vendor repository (cloudera) + + +##### `-Pmapr[version]` (optional) + +For the MapR Hadoop Distribution, these profiles will handle the Hadoop version. As MapR allows different versions of Spark to be installed, you should specify which version of Spark is installed on the cluster by adding a Spark profile (`-Pspark-1.6`, `-Pspark-2.0`, etc.) as needed. +The correct Maven artifacts can be found for every version of MapR at http://doc.mapr.com + +Available profiles are + +``` +-Pmapr3 +-Pmapr40 +-Pmapr41 +-Pmapr50 +-Pmapr51 +``` + +#### -Pexamples (optional) + +Bulid examples under zeppelin-examples directory + + +### Build command examples +Here are some examples with several options: + +```bash +# build with spark-2.1, scala-2.11 +./dev/change_scala_version.sh 2.11 +mvn clean package -Pspark-2.1 -Phadoop-2.4 -Pscala-2.11 -DskipTests + +# build with spark-2.0, scala-2.11 +./dev/change_scala_version.sh 2.11 +mvn clean package -Pspark-2.0 -Phadoop-2.4 -Pscala-2.11 -DskipTests + +# build with spark-1.6, scala-2.10 +mvn clean package -Pspark-1.6 -Phadoop-2.4 -DskipTests + +# spark-cassandra integration +mvn clean package -Pcassandra-spark-1.5 -Dhadoop.version=2.6.0 -Phadoop-2.6 -DskipTests -DskipTests + +# with CDH +mvn clean package -Pspark-1.5 -Dhadoop.version=2.6.0-cdh5.5.0 -Phadoop-2.6 -Pvendor-repo -DskipTests + +# with MapR +mvn clean package -Pspark-1.5 -Pmapr50 -DskipTests +``` + +Ignite Interpreter + +```bash +mvn clean package -Dignite.version=1.9.0 -DskipTests +``` + +Scalding Interpreter + +```bash +mvn clean package -Pscalding -DskipTests +``` + + + + +## Build requirements + +### Install requirements + +If you don't have requirements prepared, install it. +(The installation method may vary according to your environment, example is for Ubuntu.) + +``` +sudo apt-get update +sudo apt-get install git +sudo apt-get install openjdk-7-jdk +sudo apt-get install npm +sudo apt-get install libfontconfig +``` + + + +### Install maven +``` +wget http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz +sudo tar -zxf apache-maven-3.3.9-bin.tar.gz -C /usr/local/ +sudo ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/local/bin/mvn +``` + +_Notes:_ + - Ensure node is installed by running `node --version` + - Ensure maven is running version 3.1.x or higher with `mvn -version` + - Configure maven to use more memory than usual by `export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=1024m"` + + + +## Proxy setting (optional) + +If you're behind the proxy, you'll need to configure maven and npm to pass through it. + +First of all, configure maven in your `~/.m2/settings.xml`. + +``` + + + + proxy-http + true + http + localhost + 3128 + + localhost|127.0.0.1 + + + proxy-https + true + https + localhost + 3128 + + localhost|127.0.0.1 + + + +``` + +Then, next commands will configure npm. + +``` +npm config set proxy http://localhost:3128 +npm config set https-proxy http://localhost:3128 +npm config set registry "http://registry.npmjs.org/" +npm config set strict-ssl false +``` + +Configure git as well + +``` +git config --global http.proxy http://localhost:3128 +git config --global https.proxy http://localhost:3128 +git config --global url."http://".insteadOf git:// +``` + +To clean up, set `active false` in Maven `settings.xml` and run these commands. + +```bash +npm config rm proxy +npm config rm https-proxy +git config --global --unset http.proxy +git config --global --unset https.proxy +git config --global --unset url."http://".insteadOf +``` + +_Notes:_ + - If you are behind NTLM proxy you can use [Cntlm Authentication Proxy](http://cntlm.sourceforge.net/). + - Replace `localhost:3128` with the standard pattern `http://user:pwd@host:port`. + + +## Package +To package the final distribution including the compressed archive, run: + +```sh +mvn clean package -Pbuild-distr +``` + +To build a distribution with specific profiles, run: + +```sh +mvn clean package -Pbuild-distr -Pspark-1.5 -Phadoop-2.4 +``` + +The profiles `-Pspark-1.5 -Phadoop-2.4` can be adjusted if you wish to build to a specific spark versions. + +The archive is generated under _`zeppelin-distribution/target`_ directory + +## Run end-to-end tests +Zeppelin comes with a set of end-to-end acceptance tests driving headless selenium browser + +```sh +# assumes zeppelin-server running on localhost:8080 (use -Durl=.. to override) +mvn verify + +# or take care of starting/stoping zeppelin-server from packaged zeppelin-distribuion/target +mvn verify -P using-packaged-distr +``` + +[![Analytics](https://ga-beacon.appspot.com/UA-45176241-4/apache/zeppelin/README.md?pixel)](https://github.com/igrigorik/ga-beacon) diff --git a/docs/install/cdh.md b/docs/install/cdh.md new file mode 100644 index 00000000000..f661417a3f6 --- /dev/null +++ b/docs/install/cdh.md @@ -0,0 +1,100 @@ +--- +layout: page +title: "Apache Zeppelin on CDH" +description: "This document will guide you how you can build and configure the environment on CDH with Apache Zeppelin using docker scripts." +group: install +--- + +{% include JB/setup %} + +# Apache Zeppelin on CDH + +
+ +### 1. Import Cloudera QuickStart Docker image + +>[Cloudera](http://www.cloudera.com/) has officially provided CDH Docker Hub in their own container. Please check [this guide page](http://www.cloudera.com/documentation/enterprise/latest/topics/quickstart_docker_container.html#cloudera_docker_container) for more information. + +You can import the Docker image by pulling it from Cloudera Docker Hub. + +``` +docker pull cloudera/quickstart:latest +``` + + +### 2. Run docker + +``` +docker run -it \ + -p 80:80 \ + -p 4040:4040 \ + -p 8020:8020 \ + -p 8022:8022 \ + -p 8030:8030 \ + -p 8032:8032 \ + -p 8033:8033 \ + -p 8040:8040 \ + -p 8042:8042 \ + -p 8088:8088 \ + -p 8480:8480 \ + -p 8485:8485 \ + -p 8888:8888 \ + -p 9083:9083 \ + -p 10020:10020 \ + -p 10033:10033 \ + -p 18088:18088 \ + -p 19888:19888 \ + -p 25000:25000 \ + -p 25010:25010 \ + -p 25020:25020 \ + -p 50010:50010 \ + -p 50020:50020 \ + -p 50070:50070 \ + -p 50075:50075 \ + -h quickstart.cloudera --privileged=true \ + agitated_payne_backup /usr/bin/docker-quickstart; +``` + +### 3. Verify running CDH + +To verify the application is running well, check the web UI for HDFS on `http://:50070/` and YARN on `http://:8088/cluster`. + + +### 4. Configure Spark interpreter in Zeppelin +Set following configurations to `conf/zeppelin-env.sh`. + +``` +export MASTER=yarn-client +export HADOOP_CONF_DIR=[your_hadoop_conf_path] +export SPARK_HOME=[your_spark_home_path] +``` + +`HADOOP_CONF_DIR`(Hadoop configuration path) is defined in `/scripts/docker/spark-cluster-managers/cdh/hdfs_conf`. + +Don't forget to set Spark `master` as `yarn-client` in Zeppelin **Interpreters** setting page like below. + + + +### 5. Run Zeppelin with Spark interpreter +After running a single paragraph with Spark interpreter in Zeppelin, + + + +
+ +browse `http://:8088/cluster/apps` to check Zeppelin application is running well or not. + + + diff --git a/docs/install/configuration.md b/docs/install/configuration.md new file mode 100644 index 00000000000..a35eb91594c --- /dev/null +++ b/docs/install/configuration.md @@ -0,0 +1,433 @@ +--- +layout: page +title: "Apache Zeppelin Configuration" +description: "This page will guide you to configure Apache Zeppelin using either environment variables or Java properties. Also, you can configure SSL for Zeppelin." +group: install +--- + +{% include JB/setup %} + +# Apache Zeppelin Configuration + +
+ +## Zeppelin Properties +There are two locations you can configure Apache Zeppelin. + +* **Environment variables** can be defined `conf/zeppelin-env.sh`(`conf\zeppelin-env.cmd` for Windows). +* **Java properties** can ba defined in `conf/zeppelin-site.xml`. + +If both are defined, then the **environment variables** will take priority. +> Mouse hover on each property and click then you can get a link for that. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
zeppelin-env.shzeppelin-site.xmlDefault valueDescription
ZEPPELIN_PORT
zeppelin.server.port
8080Zeppelin server port
+ Note: Please make sure you're not using the same port with + Zeppelin web application development port (default: 9000).
ZEPPELIN_SSL_PORT
zeppelin.server.ssl.port
8443Zeppelin Server ssl port (used when ssl environment/property is set to true)
ZEPPELIN_MEM
N/A-Xmx1024m -XX:MaxPermSize=512mJVM mem options
ZEPPELIN_INTP_MEM
N/AZEPPELIN_MEMJVM mem options for interpreter process
ZEPPELIN_JAVA_OPTS
N/AJVM options
ZEPPELIN_ALLOWED_ORIGINS
zeppelin.server.allowed.origins
*Enables a way to specify a ',' separated list of allowed origins for REST and websockets.
e.g. http://localhost:8080
N/A
zeppelin.anonymous.allowed
trueThe anonymous user is allowed by default.
ZEPPELIN_SERVER_CONTEXT_PATH
zeppelin.server.context.path
/Context path of the web application
ZEPPELIN_SSL
zeppelin.ssl
false
ZEPPELIN_SSL_CLIENT_AUTH
zeppelin.ssl.client.auth
false
ZEPPELIN_SSL_KEYSTORE_PATH
zeppelin.ssl.keystore.path
keystore
ZEPPELIN_SSL_KEYSTORE_TYPE
zeppelin.ssl.keystore.type
JKS
ZEPPELIN_SSL_KEYSTORE_PASSWORD
zeppelin.ssl.keystore.password
ZEPPELIN_SSL_KEY_MANAGER_PASSWORD
zeppelin.ssl.key.manager.password
ZEPPELIN_SSL_TRUSTSTORE_PATH
zeppelin.ssl.truststore.path
ZEPPELIN_SSL_TRUSTSTORE_TYPE
zeppelin.ssl.truststore.type
ZEPPELIN_SSL_TRUSTSTORE_PASSWORD
zeppelin.ssl.truststore.password
ZEPPELIN_NOTEBOOK_HOMESCREEN
zeppelin.notebook.homescreen
Display note IDs on the Apache Zeppelin homescreen
e.g. 2A94M5J1Z
ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE
zeppelin.notebook.homescreen.hide
falseHide the note ID set by ZEPPELIN_NOTEBOOK_HOMESCREEN on the Apache Zeppelin homescreen.
For the further information, please read Customize your Zeppelin homepage.
ZEPPELIN_WAR_TEMPDIR
zeppelin.war.tempdir
webappsLocation of the jetty temporary directory
ZEPPELIN_NOTEBOOK_DIR
zeppelin.notebook.dir
notebookThe root directory where notebook directories are saved
ZEPPELIN_NOTEBOOK_S3_BUCKET
zeppelin.notebook.s3.bucket
zeppelinS3 Bucket where notebook files will be saved
ZEPPELIN_NOTEBOOK_S3_USER
zeppelin.notebook.s3.user
userUser name of an S3 bucket
e.g. bucket/user/notebook/2A94M5J1Z/note.json
ZEPPELIN_NOTEBOOK_S3_ENDPOINT
zeppelin.notebook.s3.endpoint
s3.amazonaws.comEndpoint for the bucket
ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID
zeppelin.notebook.s3.kmsKeyID
AWS KMS Key ID to use for encrypting data in S3 (optional)
ZEPPELIN_NOTEBOOK_S3_EMP
zeppelin.notebook.s3.encryptionMaterialsProvider
Class name of a custom S3 encryption materials provider implementation to use for encrypting data in S3 (optional)
ZEPPELIN_NOTEBOOK_S3_SSE
zeppelin.notebook.s3.sse
falseSave notebooks to S3 with server-side encryption enabled
ZEPPELIN_NOTEBOOK_AZURE_CONNECTION_STRING
zeppelin.notebook.azure.connectionString
The Azure storage account connection string
e.g.
DefaultEndpointsProtocol=https;
AccountName=<accountName>;
AccountKey=<accountKey>
ZEPPELIN_NOTEBOOK_AZURE_SHARE
zeppelin.notebook.azure.share
zeppelinAzure Share where the notebook files will be saved
ZEPPELIN_NOTEBOOK_AZURE_USER
zeppelin.notebook.azure.user
userOptional user name of an Azure file share
e.g. share/user/notebook/2A94M5J1Z/note.json
ZEPPELIN_NOTEBOOK_STORAGE
zeppelin.notebook.storage
org.apache.zeppelin.notebook.repo.GitNotebookRepoComma separated list of notebook storage locations
ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC
zeppelin.notebook.one.way.sync
falseIf there are multiple notebook storage locations, should we treat the first one as the only source of truth?
ZEPPELIN_NOTEBOOK_PUBLIC
zeppelin.notebook.public
trueMake notebook public (set only owners) by default when created/imported. If set to false will add user to readers and writers as well, making it private and invisible to other users unless permissions are granted.
ZEPPELIN_INTERPRETERS
zeppelin.interpreters
org.apache.zeppelin.spark.SparkInterpreter,
org.apache.zeppelin.spark.PySparkInterpreter,
org.apache.zeppelin.spark.SparkSqlInterpreter,
org.apache.zeppelin.spark.DepInterpreter,
org.apache.zeppelin.markdown.Markdown,
org.apache.zeppelin.shell.ShellInterpreter,
+ ... +
+ Comma separated interpreter configurations [Class]

+ NOTE: This property is deprecated since Zeppelin-0.6.0 and will not be supported from Zeppelin-0.7.0. +
ZEPPELIN_INTERPRETER_DIR
zeppelin.interpreter.dir
interpreterInterpreter directory
ZEPPELIN_INTERPRETER_DEP_MVNREPO
zeppelin.interpreter.dep.mvnRepo
http://repo1.maven.org/maven2/Remote principal repository for interpreter's additional dependency loading
ZEPPELIN_INTERPRETER_OUTPUT_LIMIT
zeppelin.interpreter.output.limit
102400Output message from interpreter exceeding the limit will be truncated
ZEPPELIN_INTERPRETER_CONNECT_TIMEOUT
zeppelin.interpreter.connect.timeout
30000Output message from interpreter exceeding the limit will be truncated
ZEPPELIN_DEP_LOCALREPO
zeppelin.dep.localrepo
local-repoLocal repository for dependency loader.
ex)visualiztion modules of npm.
ZEPPELIN_HELIUM_NPM_REGISTRY
zeppelin.helium.npm.registry
http://registry.npmjs.org/Remote Npm registry for Helium dependency loader
ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE
zeppelin.websocket.max.text.message.size
1024000Size(in characters) of the maximum text message that can be received by websocket.
ZEPPELIN_SERVER_DEFAULT_DIR_ALLOWED
zeppelin.server.default.dir.allowed
falseEnable directory listings on server.
+ + +## SSL Configuration + +Enabling SSL requires a few configuration changes. First, you need to create certificates and then update necessary configurations to enable server side SSL and/or client side certificate authentication. + +### Creating and configuring the Certificates + +Information how about to generate certificates and a keystore can be found [here](https://wiki.eclipse.org/Jetty/Howto/Configure_SSL). + +A condensed example can be found in the top answer to this [StackOverflow post](http://stackoverflow.com/questions/4008837/configure-ssl-on-jetty). + +The keystore holds the private key and certificate on the server end. The trustore holds the trusted client certificates. Be sure that the path and password for these two stores are correctly configured in the password fields below. They can be obfuscated using the Jetty password tool. After Maven pulls in all the dependency to build Zeppelin, one of the Jetty jars contain the Password tool. Invoke this command from the Zeppelin home build directory with the appropriate version, user, and password. + +``` +java -cp ./zeppelin-server/target/lib/jetty-all-server-.jar org.eclipse.jetty.util.security.Password +``` + +If you are using a self-signed, a certificate signed by an untrusted CA, or if client authentication is enabled, then the client must have a browser create exceptions for both the normal HTTPS port and WebSocket port. This can by done by trying to establish an HTTPS connection to both ports in a browser (e.g. if the ports are 443 and 8443, then visit https://127.0.0.1:443 and https://127.0.0.1:8443). This step can be skipped if the server certificate is signed by a trusted CA and client auth is disabled. + +### Configuring server side SSL + +The following properties needs to be updated in the `zeppelin-site.xml` in order to enable server side SSL. + +``` + + zeppelin.server.ssl.port + 8443 + Server ssl port. (used when ssl property is set to true) + + + + zeppelin.ssl + true + Should SSL be used by the servers? + + + + zeppelin.ssl.keystore.path + keystore + Path to keystore relative to Zeppelin configuration directory + + + + zeppelin.ssl.keystore.type + JKS + The format of the given keystore (e.g. JKS or PKCS12) + + + + zeppelin.ssl.keystore.password + change me + Keystore password. Can be obfuscated by the Jetty Password tool + + + + zeppelin.ssl.key.manager.password + change me + Key Manager password. Defaults to keystore password. Can be obfuscated. + +``` + + +### Enabling client side certificate authentication + +The following properties needs to be updated in the `zeppelin-site.xml` in order to enable client side certificate authentication. + +``` + + zeppelin.server.ssl.port + 8443 + Server ssl port. (used when ssl property is set to true) + + + + zeppelin.ssl.client.auth + true + Should client authentication be used for SSL connections? + + + + zeppelin.ssl.truststore.path + truststore + Path to truststore relative to Zeppelin configuration directory. Defaults to the keystore path + + + + zeppelin.ssl.truststore.type + JKS + The format of the given truststore (e.g. JKS or PKCS12). Defaults to the same type as the keystore type + + + + zeppelin.ssl.truststore.password + change me + Truststore password. Can be obfuscated by the Jetty Password tool. Defaults to the keystore password + +``` + + +### Obfuscating Passwords using the Jetty Password Tool + +Security best practices advise to not use plain text passwords and Jetty provides a password tool to help obfuscating the passwords used to access the KeyStore and TrustStore. + +The Password tool documentation can be found [here](http://www.eclipse.org/jetty/documentation/current/configuring-security-secure-passwords.html). + +After using the tool: + +``` +java -cp $ZEPPELIN_HOME/zeppelin-server/target/lib/jetty-util-9.2.15.v20160210.jar \ + org.eclipse.jetty.util.security.Password \ + password + +2016-12-15 10:46:47.931:INFO::main: Logging initialized @101ms +password +OBF:1v2j1uum1xtv1zej1zer1xtn1uvk1v1v +MD5:5f4dcc3b5aa765d61d8327deb882cf99 +``` + +update your configuration with the obfuscated password : + +``` + + zeppelin.ssl.keystore.password + OBF:1v2j1uum1xtv1zej1zer1xtn1uvk1v1v + Keystore password. Can be obfuscated by the Jetty Password tool + +``` + + +**Note:** After updating these configurations, Zeppelin server needs to be restarted. diff --git a/docs/install/docker.md b/docs/install/docker.md new file mode 100644 index 00000000000..6b1309ad7de --- /dev/null +++ b/docs/install/docker.md @@ -0,0 +1,61 @@ +--- +layout: page +title: "Apache Zeppelin Releases Docker Images" +description: "This document contains instructions about making docker containers for Zeppelin. It mainly provides guidance into how to create, publish and run docker images for zeppelin releases." +group: install +--- + +{% include JB/setup %} + +# Docker Image for Apache Zeppelin Releases + +
+ +## Overview +This document contains instructions about making docker containers for Zeppelin. It mainly provides guidance into how to create, publish and run docker images for zeppelin releases. + +## Quick Start + +### Installing Docker +You need to [install docker](https://docs.docker.com/engine/installation/) on your machine. + +### Running docker image + +``` +docker run -p 8080:8080 --rm --name zeppelin apache/zeppelin: +``` + +* Zeppelin will run at `http://localhost:8080`. + +If you want to specify `logs` and `notebook` dir, + +``` +docker run -p 8080:8080 --rm \ +-v $PWD/logs:/logs \ +-v $PWD/notebook:/notebook \ +-e ZEPPELIN_LOG_DIR='/logs' \ +-e ZEPPELIN_NOTEBOOK_DIR='/notebook' \ +--name zeppelin apache/zeppelin: # e.g '0.7.1' +``` + +### Building dockerfile locally + +``` +cd $ZEPPELIN_HOME +cd scripts/docker/zeppelin/bin + +docker build -t my-zeppelin:my-tag ./ +``` + diff --git a/docs/install/install.md b/docs/install/install.md index 92ece741731..506499342ed 100644 --- a/docs/install/install.md +++ b/docs/install/install.md @@ -1,7 +1,7 @@ --- layout: page -title: "Getting Started" -description: "" +title: "Quick Start" +description: "This page will help you get started and will guide you through installing Apache Zeppelin and running it in the command line." group: install --- +{% include JB/setup %} + +# Apache Zeppelin on Spark Cluster Mode + +
+ +## Overview +[Apache Spark](http://spark.apache.org/) has supported three cluster manager types([Standalone](http://spark.apache.org/docs/latest/spark-standalone.html), [Apache Mesos](http://spark.apache.org/docs/latest/running-on-mesos.html) and [Hadoop YARN](http://spark.apache.org/docs/latest/running-on-yarn.html)) so far. +This document will guide you how you can build and configure the environment on 3 types of Spark cluster manager with Apache Zeppelin using [Docker](https://www.docker.com/) scripts. +So [install docker](https://docs.docker.com/engine/installation/) on the machine first. + +## Spark standalone mode +[Spark standalone](http://spark.apache.org/docs/latest/spark-standalone.html) is a simple cluster manager included with Spark that makes it easy to set up a cluster. +You can simply set up Spark standalone environment with below steps. + +> **Note :** Since Apache Zeppelin and Spark use same `8080` port for their web UI, you might need to change `zeppelin.server.port` in `conf/zeppelin-site.xml`. + + +### 1. Build Docker file +You can find docker script files under `scripts/docker/spark-cluster-managers`. + +``` +cd $ZEPPELIN_HOME/scripts/docker/spark-cluster-managers/spark_standalone +docker build -t "spark_standalone" . +``` + +### 2. Run docker + +``` +docker run -it \ +-p 8080:8080 \ +-p 7077:7077 \ +-p 8888:8888 \ +-p 8081:8081 \ +-h sparkmaster \ +--name spark_standalone \ +spark_standalone bash; +``` + +Note that `sparkmaster` hostname used here to run docker container should be defined in your `/etc/hosts`. + +### 3. Configure Spark interpreter in Zeppelin +Set Spark master as `spark://:7077` in Zeppelin **Interpreters** setting page. + + + +### 4. Run Zeppelin with Spark interpreter +After running single paragraph with Spark interpreter in Zeppelin, browse `https://:8080` and check whether Spark cluster is running well or not. + + + +You can also simply verify that Spark is running well in Docker with below command. + +``` +ps -ef | grep spark +``` + + +## Spark on YARN mode +You can simply set up [Spark on YARN](http://spark.apache.org/docs/latest/running-on-yarn.html) docker environment with below steps. + +> **Note :** Since Apache Zeppelin and Spark use same `8080` port for their web UI, you might need to change `zeppelin.server.port` in `conf/zeppelin-site.xml`. + +### 1. Build Docker file +You can find docker script files under `scripts/docker/spark-cluster-managers`. + +``` +cd $ZEPPELIN_HOME/scripts/docker/spark-cluster-managers/spark_yarn_cluster +docker build -t "spark_yarn" . +``` + +### 2. Run docker + +``` +docker run -it \ + -p 5000:5000 \ + -p 9000:9000 \ + -p 9001:9001 \ + -p 8088:8088 \ + -p 8042:8042 \ + -p 8030:8030 \ + -p 8031:8031 \ + -p 8032:8032 \ + -p 8033:8033 \ + -p 8080:8080 \ + -p 7077:7077 \ + -p 8888:8888 \ + -p 8081:8081 \ + -p 50010:50010 \ + -p 50075:50075 \ + -p 50020:50020 \ + -p 50070:50070 \ + --name spark_yarn \ + -h sparkmaster \ + spark_yarn bash; +``` + +Note that `sparkmaster` hostname used here to run docker container should be defined in your `/etc/hosts`. + +### 3. Verify running Spark on YARN. + +You can simply verify the processes of Spark and YARN are running well in Docker with below command. + +``` +ps -ef +``` + +You can also check each application web UI for HDFS on `http://:50070/`, YARN on `http://:8088/cluster` and Spark on `http://:8080/`. + +### 4. Configure Spark interpreter in Zeppelin +Set following configurations to `conf/zeppelin-env.sh`. + +``` +export MASTER=yarn-client +export HADOOP_CONF_DIR=[your_hadoop_conf_path] +export SPARK_HOME=[your_spark_home_path] +``` + +`HADOOP_CONF_DIR`(Hadoop configuration path) is defined in `/scripts/docker/spark-cluster-managers/spark_yarn_cluster/hdfs_conf`. + +Don't forget to set Spark `master` as `yarn-client` in Zeppelin **Interpreters** setting page like below. + + + +### 5. Run Zeppelin with Spark interpreter +After running a single paragraph with Spark interpreter in Zeppelin, browse `http://:8088/cluster/apps` and check Zeppelin application is running well or not. + + + + + +## Spark on Mesos mode +You can simply set up [Spark on Mesos](http://spark.apache.org/docs/latest/running-on-mesos.html) docker environment with below steps. + + +### 1. Build Docker file + +``` +cd $ZEPPELIN_HOME/scripts/docker/spark-cluster-managers/spark_mesos +docker build -t "spark_mesos" . +``` + + +### 2. Run docker + +``` +docker run --net=host -it \ +-p 8080:8080 \ +-p 7077:7077 \ +-p 8888:8888 \ +-p 8081:8081 \ +-p 8082:8082 \ +-p 5050:5050 \ +-p 5051:5051 \ +-p 4040:4040 \ +-h sparkmaster \ +--name spark_mesos \ +spark_mesos bash; +``` + +Note that `sparkmaster` hostname used here to run docker container should be defined in your `/etc/hosts`. + +### 3. Verify running Spark on Mesos. + +You can simply verify the processes of Spark and Mesos are running well in Docker with below command. + +``` +ps -ef +``` + +You can also check each application web UI for Mesos on `http://:5050/cluster` and Spark on `http://:8080/`. + + +### 4. Configure Spark interpreter in Zeppelin + +``` +export MASTER=mesos://127.0.1.1:5050 +export MESOS_NATIVE_JAVA_LIBRARY=[PATH OF libmesos.so] +export SPARK_HOME=[PATH OF SPARK HOME] +``` + + +Don't forget to set Spark `master` as `mesos://127.0.1.1:5050` in Zeppelin **Interpreters** setting page like below. + + + + +### 5. Run Zeppelin with Spark interpreter +After running a single paragraph with Spark interpreter in Zeppelin, browse `http://:5050/#/frameworks` and check Zeppelin application is running well or not. + + + +### Troubleshooting for Spark on Mesos + +- If you have problem with hostname, use `--add-host` option when executing `dockerrun` + +``` +## use `--add-host=moby:127.0.0.1` option to resolve +## since docker container couldn't resolve `moby` + +: java.net.UnknownHostException: moby: moby: Name or service not known + at java.net.InetAddress.getLocalHost(InetAddress.java:1496) + at org.apache.spark.util.Utils$.findLocalInetAddress(Utils.scala:789) + at org.apache.spark.util.Utils$.org$apache$spark$util$Utils$$localIpAddress$lzycompute(Utils.scala:782) + at org.apache.spark.util.Utils$.org$apache$spark$util$Utils$$localIpAddress(Utils.scala:782) +``` + +- If you have problem with mesos master, try `mesos://127.0.0.1` instead of `mesos://127.0.1.1` + +``` +I0103 20:17:22.329269 340 sched.cpp:330] New master detected at master@127.0.1.1:5050 +I0103 20:17:22.330749 340 sched.cpp:341] No credentials provided. Attempting to register without authentication +W0103 20:17:22.333531 340 sched.cpp:736] Ignoring framework registered message because it was sentfrom 'master@127.0.0.1:5050' instead of the leading master 'master@127.0.1.1:5050' +W0103 20:17:24.040252 339 sched.cpp:736] Ignoring framework registered message because it was sentfrom 'master@127.0.0.1:5050' instead of the leading master 'master@127.0.1.1:5050' +W0103 20:17:26.150250 339 sched.cpp:736] Ignoring framework registered message because it was sentfrom 'master@127.0.0.1:5050' instead of the leading master 'master@127.0.1.1:5050' +W0103 20:17:26.737604 339 sched.cpp:736] Ignoring framework registered message because it was sentfrom 'master@127.0.0.1:5050' instead of the leading master 'master@127.0.1.1:5050' +W0103 20:17:35.241714 336 sched.cpp:736] Ignoring framework registered message because it was sentfrom 'master@127.0.0.1:5050' instead of the leading master 'master@127.0.1.1:5050' +``` \ No newline at end of file diff --git a/docs/install/upgrade.md b/docs/install/upgrade.md index d5642869126..61d2d68792d 100644 --- a/docs/install/upgrade.md +++ b/docs/install/upgrade.md @@ -1,7 +1,7 @@ --- layout: page -title: "Manual upgrade procedure for Zeppelin" -description: "" +title: "Manual Zeppelin version upgrade procedure" +description: "This document will guide you through a procedure of manual upgrade your Apache Zeppelin instance to a newer version. Apache Zeppelin keeps backward compatibility for the notebook file format." group: install --- {% include JB/setup %} -# Vagrant Virtual Machine for Apache Zeppelin +# Apache Zeppelin on Vagrant Virtual Machine
## Overview -Apache Zeppelin distribution includes a scripts directory +Apache Zeppelin distribution includes a script directory `scripts/vagrant/zeppelin-dev` -This script creates a virtual machine that launches a repeatable, known set of core dependencies required for developing Zeppelin. It can also be used to run an existing Zeppelin build if you don't plan to build from source. +This script creates a virtual machine that launches a repeatable, known set of core dependencies required for developing Zeppelin. It can also be used to run an existing Zeppelin build if you don't plan to build from source. For PySpark users, this script includes several helpful [Python Libraries](#python-extras). For SparkR users, this script includes several helpful [R Libraries](#r-extras). @@ -75,7 +75,7 @@ into a directory on your host machine, or directly in your virtual machine. Cloning Zeppelin into the `/scripts/vagrant/zeppelin-dev` directory from the host, will allow the directory to be shared between your host and the guest machine. -Cloning the project again may seem counter intuitive, since this script likley originated from the project repository. Consider copying just the vagrant/zeppelin-dev script from the Zeppelin project as a stand alone directory, then once again clone the specific branch you wish to build. +Cloning the project again may seem counter intuitive, since this script likely originated from the project repository. Consider copying just the vagrant/zeppelin-dev script from the Zeppelin project as a stand alone directory, then once again clone the specific branch you wish to build. Synced folders enable Vagrant to sync a folder on the host machine to the guest machine, allowing you to continue working on your project's files on your host machine, but use the resources in the guest machine to compile or run your project. _[(1) Synced Folder Description from Vagrant Up](https://docs.vagrantup.com/v2/synced-folders/index.html)_ @@ -88,7 +88,7 @@ By default, Vagrant will share your project directory (the directory with the Va Running the following commands in the guest machine should display these expected versions: `node --version` should report *v0.12.7* -`mvn --version` should report *Apache Maven 3.3.3* and *Java version: 1.7.0_85* +`mvn --version` should report *Apache Maven 3.3.9* and *Java version: 1.7.0_85* The virtual machine consists of: @@ -96,7 +96,7 @@ The virtual machine consists of: - Node.js 0.12.7 - npm 2.11.3 - ruby 1.9.3 + rake, make and bundler (only required if building jekyll documentation) - - Maven 3.3.3 + - Maven 3.3.9 - Git - Unzip - libfontconfig to avoid phatomJs missing dependency issues @@ -110,7 +110,7 @@ This assumes you've already cloned the project either on the host machine in the ``` cd /zeppelin -mvn clean package -Pspark-1.6 -Ppyspark -Phadoop-2.4 -Psparkr -DskipTests +mvn clean package -Pspark-1.6 -Phadoop-2.4 -DskipTests ./bin/zeppelin-daemon.sh start ``` @@ -163,7 +163,7 @@ import matplotlib.pyplot as plt import numpy as np import StringIO -# clear out any previous plots on this notebook +# clear out any previous plots on this note plt.clf() def show(p): diff --git a/docs/install/yarn_install.md b/docs/install/yarn_install.md index 466007d2daa..e2427546a66 100644 --- a/docs/install/yarn_install.md +++ b/docs/install/yarn_install.md @@ -1,7 +1,7 @@ --- layout: page title: "Install Zeppelin to connect with existing YARN cluster" -description: "" +description: "This page describes how to pre-configure a bare metal node, configure Apache Zeppelin and connect it to existing YARN cluster running Hortonworks flavour of Hadoop." group: install --- {% include JB/setup %} # Alluxio Interpreter for Apache Zeppelin diff --git a/docs/interpreter/beam.md b/docs/interpreter/beam.md new file mode 100644 index 00000000000..cbcd5e37d51 --- /dev/null +++ b/docs/interpreter/beam.md @@ -0,0 +1,124 @@ +--- +layout: page +title: Beam interpreter in Apache Zeppelin +description: Apache Beam is an open source, unified programming model that you can use to create a data processing pipeline. +group: interpreter +--- + + +{% include JB/setup %} + +# Beam interpreter for Apache Zeppelin + +
+ +## Overview +[Apache Beam](http://beam.incubator.apache.org) is an open source unified platform for data processing pipelines. A pipeline can be build using one of the Beam SDKs. +The execution of the pipeline is done by different Runners. Currently, Beam supports Apache Flink Runner, Apache Spark Runner, and Google Dataflow Runner. + +## How to use +Basically, you can write normal Beam java code where you can determine the Runner. You should write the main method inside a class becuase the interpreter invoke this main to execute the pipeline. Unlike Zeppelin normal pattern, each paragraph is considered as a separate job, there isn't any relation to any other paragraph. + +The following is a demonstration of a word count example with data represented in array of strings +But it can read data from files by replacing `Create.of(SENTENCES).withCoder(StringUtf8Coder.of())` with `TextIO.Read.from("path/to/filename.txt")` + +```java +%beam + +// most used imports +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.transforms.Create; +import java.io.Serializable; +import java.util.Arrays; +import java.util.List; +import java.util.ArrayList; +import org.apache.spark.api.java.*; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.SparkConf; +import org.apache.spark.streaming.*; +import org.apache.spark.SparkContext; +import org.apache.beam.runners.direct.*; +import org.apache.beam.sdk.runners.*; +import org.apache.beam.sdk.options.*; +import org.apache.beam.runners.spark.*; +import org.apache.beam.runners.spark.io.ConsoleIO; +import org.apache.beam.runners.flink.*; +import org.apache.beam.runners.flink.examples.WordCount.Options; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.io.TextIO; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.transforms.Count; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.SimpleFunction; +import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.options.PipelineOptions; + +public class MinimalWordCount { + static List s = new ArrayList<>(); + + static final String[] SENTENCES_ARRAY = new String[] { + "Hadoop is the Elephant King!", + "A yellow and elegant thing.", + "He never forgets", + "Useful data, or lets", + "An extraneous element cling!", + "A wonderful king is Hadoop.", + "The elephant plays well with Sqoop.", + "But what helps him to thrive", + "Are Impala, and Hive,", + "And HDFS in the group.", + "Hadoop is an elegant fellow.", + "An elephant gentle and mellow.", + "He never gets mad,", + "Or does anything bad,", + "Because, at his core, he is yellow", + }; + static final List SENTENCES = Arrays.asList(SENTENCES_ARRAY); + public static void main(String[] args) { + Options options = PipelineOptionsFactory.create().as(Options.class); + options.setRunner(FlinkRunner.class); + Pipeline p = Pipeline.create(options); + p.apply(Create.of(SENTENCES).withCoder(StringUtf8Coder.of())) + .apply("ExtractWords", ParDo.of(new DoFn() { + @Override + public void processElement(ProcessContext c) { + for (String word : c.element().split("[^a-zA-Z']+")) { + if (!word.isEmpty()) { + c.output(word); + } + } + } + })) + .apply(Count. perElement()) + .apply("FormatResults", ParDo.of(new DoFn, String>() { + @Override + public void processElement(DoFn, String>.ProcessContext arg0) + throws Exception { + s.add("\n" + arg0.element().getKey() + "\t" + arg0.element().getValue()); + } + })); + p.run(); + System.out.println("%table word\tcount"); + for (int i = 0; i < s.size(); i++) { + System.out.print(s.get(i)); + } + + } +} + +``` + diff --git a/docs/interpreter/bigquery.md b/docs/interpreter/bigquery.md index 1e92aa98757..7ebe2e2fda8 100644 --- a/docs/interpreter/bigquery.md +++ b/docs/interpreter/bigquery.md @@ -1,9 +1,23 @@ --- layout: page -title: "BigQuery Interpreter" -description: "" +title: "BigQuery Interpreter for Apache Zeppelin" +description: "BigQuery is a highly scalable no-ops data warehouse in the Google Cloud Platform." group: interpreter --- + +{% include JB/setup %} # BigQuery Interpreter for Apache Zeppelin diff --git a/docs/interpreter/cassandra.md b/docs/interpreter/cassandra.md index 33cff199b05..5d8929bd588 100644 --- a/docs/interpreter/cassandra.md +++ b/docs/interpreter/cassandra.md @@ -1,9 +1,22 @@ --- layout: page -title: "Cassandra Interpreter" -description: "Cassandra Interpreter" -group: manual +title: "Cassandra CQL Interpreter for Apache Zeppelin" +description: "Apache Cassandra database is the right choice when you need scalability and high availability without compromising performance." +group: interpreter --- + {% include JB/setup %} # Cassandra CQL Interpreter for Apache Zeppelin diff --git a/docs/interpreter/elasticsearch.md b/docs/interpreter/elasticsearch.md index 4721bcda3f2..165116b9e9c 100644 --- a/docs/interpreter/elasticsearch.md +++ b/docs/interpreter/elasticsearch.md @@ -1,9 +1,22 @@ --- layout: page -title: "Elasticsearch Interpreter" -description: "" -group: manual +title: "Elasticsearch Interpreter for Apache Zeppelin" +description: "Elasticsearch is a highly scalable open-source full-text search and analytics engine." +group: interpreter --- + {% include JB/setup %} # Elasticsearch Interpreter for Apache Zeppelin @@ -33,7 +46,22 @@ group: manual elasticsearch.port 9300 - Connection port ( Important: this is not the HTTP port, but the transport port ) + Connection port ( Important: it depends on the client type, transport or http) + + + elasticsearch.client.type + transport + The type of client for Elasticsearch (transport or http)( Important: the port depends on this value) + + + elasticsearch.basicauth.username + + Username for a basic authentication (http) + + + elasticsearch.basicauth.password + + Password for a basic authentication (http) elasticsearch.result.size @@ -230,7 +258,7 @@ delete /index/type/id ``` ### Apply Zeppelin Dynamic Forms -You can leverage [Zeppelin Dynamic Form]({{BASE_PATH}}/manual/dynamicform.html) inside your queries. You can use both the `text input` and `select form` parameterization features. +You can leverage [Zeppelin Dynamic Form](../manual/dynamicform.html) inside your queries. You can use both the `text input` and `select form` parameterization features. ```bash %elasticsearch diff --git a/docs/interpreter/flink.md b/docs/interpreter/flink.md index a678480b59f..2cf31257ad6 100644 --- a/docs/interpreter/flink.md +++ b/docs/interpreter/flink.md @@ -1,9 +1,22 @@ --- layout: page -title: "Flink Interpreter" -description: "" -group: manual +title: "Flink Interpreter for Apache Zeppelin" +description: "Apache Flink is an open source platform for distributed stream and batch data processing." +group: interpreter --- + {% include JB/setup %} # Flink interpreter for Apache Zeppelin @@ -40,7 +53,7 @@ At the "Interpreters" menu, you have to create a new Flink interpreter and provi For more information about Flink configuration, you can find it [here](https://ci.apache.org/projects/flink/flink-docs-release-1.0/setup/config.html). ## How to test it's working -In example, by using the [Zeppelin notebook](https://www.zeppelinhub.com/viewer/notebooks/aHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL05GTGFicy96ZXBwZWxpbi1ub3RlYm9va3MvbWFzdGVyL25vdGVib29rcy8yQVFFREs1UEMvbm90ZS5qc29u) is from Till Rohrmann's presentation [Interactive data analysis with Apache Flink](http://www.slideshare.net/tillrohrmann/data-analysis-49806564) for Apache Flink Meetup. +You can find an example of Flink usage in the Zeppelin Tutorial folder or try the following word count example, by using the [Zeppelin notebook](https://www.zeppelinhub.com/viewer/notebooks/aHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL05GTGFicy96ZXBwZWxpbi1ub3RlYm9va3MvbWFzdGVyL25vdGVib29rcy8yQVFFREs1UEMvbm90ZS5qc29u) from Till Rohrmann's presentation [Interactive data analysis with Apache Flink](http://www.slideshare.net/tillrohrmann/data-analysis-49806564) for Apache Flink Meetup. ``` %sh @@ -50,7 +63,7 @@ wget http://www.gutenberg.org/ebooks/10.txt.utf-8 {% highlight scala %} %flink case class WordCount(word: String, frequency: Int) -val bible:DataSet[String] = env.readTextFile("10.txt.utf-8") +val bible:DataSet[String] = benv.readTextFile("10.txt.utf-8") val partialCounts: DataSet[WordCount] = bible.flatMap{ line => """\b\w+\b""".r.findAllIn(line).map(word => WordCount(word, 1)) diff --git a/docs/interpreter/geode.md b/docs/interpreter/geode.md index 84a026efff5..f833f9d8911 100644 --- a/docs/interpreter/geode.md +++ b/docs/interpreter/geode.md @@ -1,9 +1,22 @@ --- layout: page -title: "Geode OQL Interpreter" -description: "" -group: manual +title: "Geode/Gemfire OQL Interpreter for Apache Zeppelin" +description: "Apache Geode (incubating) provides a database-like consistency model, reliable transaction processing and a shared-nothing architecture to maintain very low latency performance with high concurrency processing." +group: interpreter --- + {% include JB/setup %} # Geode/Gemfire OQL Interpreter for Apache Zeppelin diff --git a/docs/interpreter/groovy.md b/docs/interpreter/groovy.md new file mode 100644 index 00000000000..f64cbded242 --- /dev/null +++ b/docs/interpreter/groovy.md @@ -0,0 +1,138 @@ +--- +layout: page +title: "Apache Groovy Interpreter for Apache Zeppelin" +description: "Apache Groovy is a powerful, optionally typed and dynamic language, with static-typing and static compilation capabilities, for the Java platform aimed at improving developer productivity thanks to a concise, familiar and easy to learn syntax." +group: interpreter +--- + +{% include JB/setup %} + +# Groovy Interpreter for Apache Zeppelin + + +### Samples + +```groovy +%groovy +//get a parameter defined as z.angularBind('ngSearchParam', value, 'paragraph_id') +//g is a context object for groovy to avoid mix with z object +def param = g.angular('ngSearchParam') +//send request https://www.googleapis.com/customsearch/v1?q=ngSearchParam_value +def r = HTTP.get( + //assume you defined the groovy interpreter property + // `search_baseurl`='https://www.googleapis.com/customsearch/v1' + //in groovy object o.getProperty('A') == o.'A' == o.A == o['A'] + url : g.search_baseurl, + query: [ q: param ], + headers: [ + 'Accept':'application/json', + //'Authorization:' : g.getProperty('search_auth'), + ], + ssl : g.getProperty('search_ssl') // assume groovy interpreter property search_ssl = HTTP.getNaiveSSLContext() +) +//check response code +if( r.response.code==200 ) { + g.html().with{ + //g.html() renders %angular to output and returns groovy.xml.MarkupBuilder + h2("the response ${r.response.code}") + span( r.response.body ) + h2("headers") + pre( r.response.headers.join('\n') ) + } +} else { + //just to show that it's possible to use println with multiline groovy string to render output + println("""%angular + + """) +} +``` + + +```groovy +%groovy + +//renders a table with headers a, b, c and two rows +g.table( + [ + ['a','b','c'], + ['a1','b1','c1'], + ['a2','b2','c2'], + ] +) +``` + +### the `g` object + +* `g.angular(String name)` + + Returns angular object by name. Look up notebook scope first and then global scope. + + +* `g.angularBind(String name, Object value)` + + Assign a new `value` into angular object `name` + + +* `java.util.Properties g.getProperties()` + + returns all properties defined for this interpreter + + +* `String g.getProperty('PROPERTY_NAME')` + ```groovy + g.PROPERTY_NAME + g.'PROPERTY_NAME' + g['PROPERTY_NAME'] + g.getProperties().getProperty('PROPERTY_NAME') + ``` + + All above the accessor to named property defined in groovy interpreter. + In this case with name `PROPERTY_NAME` + + +* `groovy.xml.MarkupBuilder g.html()` + + Starts or continues rendering of `%angular` to output and returns [groovy.xml.MarkupBuilder](http://groovy-lang.org/processing-xml.html#_markupbuilder) + MarkupBuilder is usefull to generate html (xml) + +* `void g.table(obj)` + + starts or continues rendering table rows. + + obj: List(rows) of List(columns) where first line is a header + + +* `g.input(name, value )` + + Creates `text` input with value specified. The parameter `value` is optional. + +* `g.select(name, default, Map options)` + + Creates `select` input with defined options. The parameter `default` is optional. + + ```g.select('sex', 'm', ['m':'man', 'w':'woman'])``` + +* `g.checkbox(name, Collection checked, Map options)` + + Creates `checkbox` input. + +* `g.get(name, default)` + + Returns interpreter-based variable. Visibility depends on interpreter scope. The parameter `default` is optional. + +* `g.put(name, value)` + + Stores new value into interpreter-based variable. Visibility depends on interpreter scope. + diff --git a/docs/interpreter/hbase.md b/docs/interpreter/hbase.md index 1aeb77bcade..12e05174359 100644 --- a/docs/interpreter/hbase.md +++ b/docs/interpreter/hbase.md @@ -1,9 +1,22 @@ --- layout: page -title: "HBase Shell Interpreter" -description: "" -group: manual +title: "HBase Shell Interpreter for Apache Zeppelin" +description: "HBase Shell is a JRuby IRB client for Apache HBase. This interpreter provides all capabilities of Apache HBase shell within Apache Zeppelin." +group: interpreter --- + {% include JB/setup %} # HBase Shell Interpreter for Apache Zeppelin diff --git a/docs/interpreter/hdfs.md b/docs/interpreter/hdfs.md index 7cde31a6960..d7b7bf885d8 100644 --- a/docs/interpreter/hdfs.md +++ b/docs/interpreter/hdfs.md @@ -1,9 +1,22 @@ --- layout: page -title: "HDFS File System Interpreter" -description: "" -group: manual +title: "HDFS File System Interpreter for Apache Zeppelin" +description: "Hadoop File System is a distributed, fault tolerant file system part of the hadoop project and is often used as storage for distributed processing engines like Hadoop MapReduce and Apache Spark or underlying file systems like Alluxio." +group: interpreter --- + {% include JB/setup %} # HDFS File System Interpreter for Apache Zeppelin diff --git a/docs/interpreter/hive.md b/docs/interpreter/hive.md index a1fc4e1e618..ba6614b41e9 100644 --- a/docs/interpreter/hive.md +++ b/docs/interpreter/hive.md @@ -1,9 +1,22 @@ --- layout: page -title: "Hive Interpreter" -description: "" -group: manual +title: "Hive Interpreter for Apache Zeppelin" +description: "Apache Hive data warehouse software facilitates querying and managing large datasets residing in distributed storage. Hive provides a mechanism to project structure onto this data and query the data using a SQL-like language called HiveQL. At the same time this language also allows traditional map/reduce programmers to plug in their custom mappers and reducers when it is inconvenient or inefficient to express this logic in HiveQL." +group: interpreter --- + {% include JB/setup %} # Hive Interpreter for Apache Zeppelin @@ -138,7 +151,7 @@ select * from my_table; You can also run multiple queries up to 10 by default. Changing these settings is not implemented yet. ### Apply Zeppelin Dynamic Forms -You can leverage [Zeppelin Dynamic Form]({{BASE_PATH}}/manual/dynamicform.html) inside your queries. You can use both the `text input` and `select form` parameterization features. +You can leverage [Zeppelin Dynamic Form](../manual/dynamicform.html) inside your queries. You can use both the `text input` and `select form` parameterization features. ```sql %hive diff --git a/docs/interpreter/ignite.md b/docs/interpreter/ignite.md index 8a25fd7ca79..677952f1cf1 100644 --- a/docs/interpreter/ignite.md +++ b/docs/interpreter/ignite.md @@ -1,9 +1,22 @@ --- layout: page -title: "Ignite Interpreter" -description: "Ignite user guide" -group: manual +title: "Ignite Interpreter for Apache Zeppelin" +description: "Apache Ignite in-memory Data Fabric is a high-performance, integrated and distributed in-memory platform for computing and transacting on large-scale data sets in real-time, orders of magnitude faster than possible with traditional disk-based or flash technologies." +group: interpreter --- + {% include JB/setup %} # Ignite Interpreter for Apache Zeppelin @@ -20,8 +33,8 @@ You can use Zeppelin to retrieve distributed data from cache using Ignite SQL in ## Installing and Running Ignite example In order to use Ignite interpreters, you may install Apache Ignite in some simple steps: -1. Download Ignite [source release](https://ignite.apache.org/download.html#sources) or [binary release](https://ignite.apache.org/download.html#binaries) whatever you want. But you must download Ignite as the same version of Zeppelin's. If it is not, you can't use scala code on Zeppelin. You can find ignite version in Zeppelin at the pom.xml which is placed under `path/to/your-Zeppelin/ignite/pom.xml` ( Of course, in Zeppelin source release ). Please check `ignite.version` .
Currently, Zeppelin provides ignite only in Zeppelin source release. So, if you download Zeppelin binary release( `zeppelin-0.5.0-incubating-bin-spark-xxx-hadoop-xx` ), you can not use ignite interpreter on Zeppelin. We are planning to include ignite in a future binary release. -2. Examples are shipped as a separate Maven project, so to start running you simply need to import provided /apache-ignite-fabric-1.2.0-incubating-bin/pom.xml file into your favourite IDE, such as Eclipse. +1. Ignite provides examples only with source or binary release. Download Ignite [source release](https://ignite.apache.org/download.html#sources) or [binary release](https://ignite.apache.org/download.html#binaries) whatever you want. But you must download Ignite as the same version of Zeppelin's. If it is not, you can't use scala code on Zeppelin. The supported Ignite version is specified in [Supported Interpreter table](https://zeppelin.apache.org/supported_interpreters.html#ignite) for each Zeppelin release. If you're using Zeppelin master branch, please see `ignite.version` in `path/to/your-Zeppelin/ignite/pom.xml`. +2. Examples are shipped as a separate Maven project, so to start running you simply need to import provided `/apache-ignite-fabric-{version}-bin/examples/pom.xml` file into your favourite IDE, such as Eclipse. * In case of Eclipse, Eclipse -> File -> Import -> Existing Maven Projects * Set examples directory path to Eclipse and select the pom.xml. @@ -45,12 +58,12 @@ At the "Interpreters" menu, you may edit Ignite interpreter or create new one. Z ignite.addresses 127.0.0.1:47500..47509 - Coma separated list of Ignite cluster hosts. See [Ignite Cluster Configuration](https://apacheignite.readme.io/v1.2/docs/cluster-config) section for more details. + Coma separated list of Ignite cluster hosts. See [Ignite Cluster Configuration](https://apacheignite.readme.io/docs/cluster-config) section for more details. ignite.clientMode true - You can connect to the Ignite cluster as client or server node. See [Ignite Clients vs. Servers](https://apacheignite.readme.io/v1.2/docs/clients-vs-servers) section for details. Use true or false values in order to connect in client or server mode respectively. + You can connect to the Ignite cluster as client or server node. See [Ignite Clients vs. Servers](https://apacheignite.readme.io/docs/clients-vs-servers) section for details. Use true or false values in order to connect in client or server mode respectively. ignite.config.url @@ -65,7 +78,7 @@ At the "Interpreters" menu, you may edit Ignite interpreter or create new one. Z ignite.peerClassLoadingEnabled true - Enables peer-class-loading. See [Zero Deployment](https://apacheignite.readme.io/v1.2/docs/zero-deployment) section for details. Use true or false values in order to enable or disable P2P class loading respectively. + Enables peer-class-loading. See [Zero Deployment](https://apacheignite.readme.io/docs/zero-deployment) section for details. Use true or false values in order to enable or disable P2P class loading respectively. @@ -76,7 +89,7 @@ After configuring Ignite interpreter, create your own notebook. Then you can bin ![Binding Interpreters](../assets/themes/zeppelin/img/docs-img/ignite-interpreter-binding.png) -For more interpreter binding information see [here](http://zeppelin.apache.org/docs/manual/interpreters.html). +For more interpreter binding information see [here](../manual/interpreters.html#what-is-interpreter-setting). ### Ignite SQL interpreter In order to execute SQL query, use ` %ignite.ignitesql ` prefix.
diff --git a/docs/interpreter/jdbc.md b/docs/interpreter/jdbc.md index f977dd1bcbb..b7ac45ae442 100644 --- a/docs/interpreter/jdbc.md +++ b/docs/interpreter/jdbc.md @@ -1,75 +1,190 @@ --- layout: page -title: "Generic JDBC Interpreter" -description: "JDBC user guide" -group: manual +title: "Generic JDBC Interpreter for Apache Zeppelin" +description: "Generic JDBC Interpreter lets you create a JDBC connection to any data source. You can use Postgres, MySql, MariaDB, Redshift, Apache Hive, Apache Phoenix, Apache Drill and Apache Tajo using JDBC interpreter." +group: interpreter --- + {% include JB/setup %} - -# Generic JDBC Interpreter for Apache Zeppelin +# Generic JDBC Interpreter for Apache Zeppelin
## Overview -This interpreter lets you create a JDBC connection to any data source, by now it has been tested with: +JDBC interpreter lets you create a JDBC connection to any data sources seamlessly. + +Inserts, Updates, and Upserts are applied immediately after running each statement. + +By now, it has been tested with: -* Postgres -* MySql -* MariaDB -* Redshift -* Apache Hive -* Apache Phoenix -* Apache Drill (Details on using [Drill JDBC Driver](https://drill.apache.org/docs/using-the-jdbc-driver)) -* Apache Tajo +
+
+ +
+
+
  • + Postgresql - + JDBC Driver +
  • +
  • + Mysql - + JDBC Driver +
  • +
  • + MariaDB - + JDBC Driver +
  • +
  • + Redshift - + JDBC Driver +
  • +
  • + Apache Hive - + JDBC Driver +
  • +
  • + Apache Phoenix itself is a JDBC driver +
  • +
  • + Apache Drill - + JDBC Driver +
  • +
  • + Apache Tajo - + JDBC Driver +
  • +
    +
    -If someone else used another database please report how it works to improve functionality. +If you are using other databases not in the above list, please feel free to share your use case. It would be helpful to improve the functionality of JDBC interpreter. -## Create Interpreter +## Create a new JDBC Interpreter -When you create a interpreter by default use PostgreSQL with the next properties: +First, click `+ Create` button at the top-right corner in the interpreter setting page. + + + +Fill `Interpreter name` field with whatever you want to use as the alias(e.g. mysql, mysql2, hive, redshift, and etc..). Please note that this alias will be used as `%interpreter_name` to call the interpreter in the paragraph. +Then select `jdbc` as an `Interpreter group`. + + + +The default driver of JDBC interpreter is set as `PostgreSQL`. It means Zeppelin includes `PostgreSQL` driver jar in itself. +So you don't need to add any dependencies(e.g. the artifact name or path for `PostgreSQL` driver jar) for `PostgreSQL` connection. +The JDBC interpreter properties are defined by default like below. - - + + + + + - + + + - + + + + + + + + + + + +
    namevalueNameDefault ValueDescription
    common.max_count 1000The maximun number of SQL result to display
    default.driver org.postgresql.DriverJDBC Driver Name
    default.password********The JDBC user password
    default.url jdbc:postgresql://localhost:5432/The URL for JDBC
    default.user gpadmin
    The JDBC user name
    default.precodeSome SQL which executes every time after initialization of the interpreter (see [Binding mode](../manual/interpreters.md#interpreter-binding-mode))
    default.completer.schemaFiltersСomma separated schema (schema = catalog = database) filters to get metadata for completions. Supports '%' symbol is equivalent to any set of characters. (ex. prod_v_%,public%,info)
    -It is not necessary to add driver jar to the classpath for PostgreSQL as it is included in Zeppelin. +If you want to connect other databases such as `Mysql`, `Redshift` and `Hive`, you need to edit the property values. +You can also use [Credential](../security/datasource_authorization.html) for JDBC authentication. +If `default.user` and `default.password` properties are deleted(using X button) for database connection in the interpreter setting page, +the JDBC interpreter will get the account information from [Credential](../security/datasource_authorization.html). -### Simple connection +The below example is for `Mysql` connection. -Prior to creating the interpreter it is necessary to add maven coordinate or path of the JDBC driver to the Zeppelin classpath. To do this you must edit dependencies artifact(ex. `mysql:mysql-connector-java:5.1.38`) in interpreter menu as shown: + -
    -
    - -
    -
    +The last step is **Dependency Setting**. Since Zeppelin only includes `PostgreSQL` driver jar by default, you need to add each driver's maven coordinates or JDBC driver's jar file path for the other databases. + + + +That's it. You can find more JDBC connection setting examples([Mysql](#mysql), [MariaDB](#mariadb), [Redshift](#redshift), [Apache Hive](#apache-hive), [Apache Phoenix](#apache-phoenix), and [Apache Tajo](#apache-tajo)) in [this section](#examples). + +## More properties +There are more JDBC interpreter properties you can specify like below. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Property NameDescription
    common.max_resultMax number of SQL result to display to prevent the browser overload. This is common properties for all connections
    zeppelin.jdbc.auth.typeTypes of authentications' methods supported are SIMPLE, and KERBEROS
    zeppelin.jdbc.principalThe principal name to load from the keytab
    zeppelin.jdbc.keytab.locationThe path to the keytab file
    zeppelin.jdbc.auth.kerberos.proxy.enableWhen auth type is Kerberos, enable/disable Kerberos proxy with the login user to get the connection. Default value is true.
    default.jceks.filejceks store path (e.g: jceks://file/tmp/zeppelin.jceks)
    default.jceks.credentialKeyjceks credential key
    -To create the interpreter you need to specify connection parameters as shown in the table. +You can also add more properties by using this [method](http://docs.oracle.com/javase/7/docs/api/java/sql/DriverManager.html#getConnection%28java.lang.String,%20java.util.Properties%29). +For example, if a connection needs a schema parameter, it would have to add the property as follows: @@ -77,306 +192,523 @@ To create the interpreter you need to specify connection parameters as shown in - - + + + +
    value
    common.max_count1000default.schemaschema_name
    + +## Binding JDBC interpter to notebook +To bind the interpreters created in the interpreter setting page, click the gear icon at the top-right corner. + + + +Select(blue) or deselect(white) the interpreter buttons depending on your use cases. +If you need to use more than one interpreter in the notebook, activate several buttons. +Don't forget to click `Save` button, or you will face `Interpreter *** is not found` error. + + + +## How to use +### Run the paragraph with JDBC interpreter +To test whether your databases and Zeppelin are successfully connected or not, type `%jdbc_interpreter_name`(e.g. `%mysql`) at the top of the paragraph and run `show databases`. + +```sql +%jdbc_interpreter_name +show databases +``` +If the paragraph is `FINISHED` without any errors, a new paragraph will be automatically added after the previous one with `%jdbc_interpreter_name`. +So you don't need to type this prefix in every paragraphs' header. + + + +### Apply Zeppelin Dynamic Forms + +You can leverage [Zeppelin Dynamic Form](../manual/dynamicform.html) inside your queries. You can use both the `text input` and `select form` parametrization features. + +```sql +%jdbc_interpreter_name +SELECT name, country, performer +FROM demo.performers +WHERE name='{{"{{performer=Sheryl Crow|Doof|Fanfarlo|Los Paranoia"}}}}' +``` +### Usage *precode* +You can set *precode* for each data source. Code runs once while opening the connection. + +##### Properties +An example settings of interpreter for the two data sources, each of which has its *precode* parameter. + + + + + - + - + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + +
    Property NameValue
    default.driverdriver nameorg.postgresql.Driver
    default.password********1
    default.urljdbc urljdbc:postgresql://localhost:5432/
    default.useruser name
    postgres
    default.precodeset search_path='test_path'
    mysql.drivercom.mysql.jdbc.Driver
    mysql.password1
    mysql.urljdbc:mysql://localhost:3306/
    mysql.userroot
    mysql.precodeset @v=12
    -### Multiple connections +##### Usage +Test of execution *precode* for each data source. + +```sql +%jdbc +show search_path +``` +Returns value of `search_path` which is set in the *default.precode*. -JDBC interpreter also allows connections to multiple data sources. It is necessary to set a prefix for each connection to reference it in the paragraph in the form of `%jdbc(prefix)`. Before you create the interpreter it is necessary to add each driver's maven coordinates or JDBC driver's jar file path to the Zeppelin classpath. To do this you must edit the dependencies of JDBC interpreter in interpreter menu as following: -
    -
    - -
    -
    +```sql +%jdbc(mysql) +select @v +``` +Returns value of `v` which is set in the *mysql.precode*. + + +## Examples +Here are some examples you can refer to. Including the below connectors, you can connect every databases as long as it can be configured with it's JDBC driver. -You can add all the jars you need to make multiple connections into the same JDBC interpreter. To create the interpreter you must specify the parameters. For example we will create two connections to MySQL and Redshift, the respective prefixes are `default` and `redshift`: +### Postgres + + +##### Properties - - + + - - + + + + + + + + + + + + + + + +
    namevalueNameValue
    common.max_count1000default.driverorg.postgresql.Driver
    default.urljdbc:postgresql://localhost:5432/
    default.usermysql_user
    default.passwordmysql_password
    + +[Postgres JDBC Driver Docs](https://jdbc.postgresql.org/documentation/documentation.html) + +##### Dependencies + + + + + + + + + +
    ArtifactExcludes
    org.postgresql:postgresql:9.4.1211
    + +[Maven Repository: org.postgresql:postgresql](https://mvnrepository.com/artifact/org.postgresql/postgresql) + +### Mysql + + + +##### Properties + + + + + + + + + + + + - + + +
    NameValue
    default.driver com.mysql.jdbc.Driver
    default.urljdbc:mysql://localhost:3306/
    default.usermysql_user
    default.password********mysql_password
    + +[Mysql JDBC Driver Docs](https://dev.mysql.com/downloads/connector/j/) + +##### Dependencies + + + + + + + + + +
    ArtifactExcludes
    mysql:mysql-connector-java:5.1.38
    + +[Maven Repository: mysql:mysql-connector-java](https://mvnrepository.com/artifact/mysql/mysql-connector-java) + +### MariaDB + + + +##### Properties + + + + + + + + - + + + + + + + + + + +
    NameValue
    default.driverorg.mariadb.jdbc.Driver
    default.urljdbc:mysql://localhost:3306/jdbc:mariadb://localhost:3306
    default.usermariadb_user
    default.passwordmariadb_password
    + +[MariaDB JDBC Driver Docs](https://mariadb.com/kb/en/mariadb/about-mariadb-connector-j/) + +##### Dependencies + + + + + + + + + +
    ArtifactExcludes
    org.mariadb.jdbc:mariadb-java-client:1.5.4
    + +[Maven Repository: org.mariadb.jdbc:mariadb-java-client](https://mvnrepository.com/artifact/org.mariadb.jdbc/mariadb-java-client) + +### Redshift + + + +##### Properties + + + + + + + + + + + + - + + + + + + +
    NameValue
    default.drivercom.amazon.redshift.jdbc42.Driver
    default.urljdbc:redshift://your-redshift-instance-address.redshift.amazonaws.com:5439/your-database
    default.usermysql-userredshift_user
    default.passwordredshift_password
    + +[AWS Redshift JDBC Driver Docs](http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html) + +##### Dependencies + + + + - - + + +
    ArtifactExcludes
    redshift.drivercom.amazon.redshift.jdbc4.Drivercom.amazonaws:aws-java-sdk-redshift:1.11.51
    + +[Maven Repository: com.amazonaws:aws-java-sdk-redshift](https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-redshift) + +### Apache Hive + + + +##### Properties + - - + + - - + + - - - + + + + + + + + + + + + + + + +
    redshift.password********NameValue
    redshift.urljdbc:redshift://examplecluster.abc123xyz789.us-west-2.redshift.amazonaws.com:5439default.driverorg.apache.hive.jdbc.HiveDriver
    redshift.userredshift-user
    default.urljdbc:hive2://localhost:10000
    default.userhive_user
    default.passwordhive_password
    default.proxy.user.propertyExample value: hive.server2.proxy.user
    + +[Apache Hive 1 JDBC Driver Docs](https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients#HiveServer2Clients-JDBC) +[Apache Hive 2 JDBC Driver Docs](https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients#HiveServer2Clients-JDBC) + +##### Dependencies + + + + + + + + + + + + +
    ArtifactExcludes
    org.apache.hive:hive-jdbc:0.14.0
    org.apache.hadoop:hadoop-common:2.6.0
    +[Maven Repository : org.apache.hive:hive-jdbc](https://mvnrepository.com/artifact/org.apache.hive/hive-jdbc) + +##### Impersonation +When Zeppelin server is running with authentication enabled, then the interpreter can utilize Hive's user proxy feature i.e. send extra parameter for creating and running a session ("hive.server2.proxy.user=": "${loggedInUser}"). This is particularly useful when multiple users are sharing a notebook. + +To enable this set following: -## Bind to Notebook -In the `Notebook` click on the `settings` icon at the top-right corner. Use select/deselect to specify the interpreters to be used in the `Notebook`. - -## More Properties -You can modify the interpreter configuration in the `Interpreter` section. The most common properties are as follows, but you can specify other properties that need to be connected. - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Property NameDescription
    {prefix}.urlJDBC URL to connect, the URL must include the name of the database
    {prefix}.userJDBC user name
    {prefix}.passwordJDBC password
    {prefix}.driverJDBC driver name.
    common.max_resultMax number of SQL result to display to prevent the browser overload. This is common properties for all connections
    - -To develop this functionality use this [method](http://docs.oracle.com/javase/7/docs/api/java/sql/DriverManager.html#getConnection%28java.lang.String,%20java.util.Properties%29). For example if a connection needs a schema parameter, it would have to add the property as follows: + - `zeppelin.jdbc.auth.type` as `SIMPLE` or `KERBEROS` (if required) in the interpreter setting. + - `${prefix}.proxy.user.property` as `hive.server2.proxy.user` + +##### Sample configuration - - + + - - + + + + + + + + + + + + + + + + + +
    namevalueNameValue
    {prefix}.schemaschema_namehive.driverorg.apache.hive.jdbc.HiveDriver
    hive.password
    hive.urljdbc:hive2://hive-server-host:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
    hive.proxy.user.propertyhive.server2.proxy.user
    zeppelin.jdbc.auth.typeSIMPLE
    -## Examples -### Hive - -#### Properties - - - - - - - - - - - - - - - - - - - - - -
    NameValue
    hive.driverorg.apache.hive.jdbc.HiveDriver
    hive.urljdbc:hive2://localhost:10000
    hive.userhive_user
    hive.passwordhive_password
    - -#### Dependencies - - - - - - - - - - - - - -
    ArtifactExcludes
    org.apache.hive:hive-jdbc:0.14.0
    org.apache.hadoop:hadoop-common:2.6.0
    - -### Phoenix - - Phoenix supports `thick` and `thin` connection types: - - - Thick client is faster, but must connect directly to ZooKeeper and HBase RegionServers. - - Thin client has fewer dependencies and connects through a [Phoenix Query Server](http://phoenix.apache.org/server.html) instance. - -Use the appropriate `phoenix.driver` and `phoenix.url` for your connection type. - -#### Properties: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    NameValueDescription
    phoenix.driverorg.apache.phoenix.jdbc.PhoenixDriver'Thick Client', connects directly to Phoenix
    phoenix.driverorg.apache.phoenix.queryserver.client.Driver'Thin Client', connects via Phoenix Query Server
    phoenix.urljdbc:phoenix:localhost:2181:/hbase-unsecure'Thick Client', connects directly to Phoenix
    phoenix.urljdbc:phoenix:thin:url=http://localhost:8765;serialization=PROTOBUF'Thin Client', connects via Phoenix Query Server
    phoenix.userphoenix_user
    phoenix.passwordphoenix_password
    -#### Dependencies: - - Include the dependency for your connection type (it should be only *one* of the following). - - - - - - - - - - - - - - - - - - - - - - -
    ArtifactExcludesDescription
    org.apache.phoenix:phoenix-core:4.4.0-HBase-1.0'Thick Client', connects directly to Phoenix
    org.apache.phoenix:phoenix-server-client:4.7.0-HBase-1.1'Thin Client' for Phoenix 4.7, connects via Phoenix Query Server
    org.apache.phoenix:phoenix-queryserver-client:4.8.0-HBase-1.2'Thin Client' for Phoenix 4.8+, connects via Phoenix Query Server
    - -### Tajo -#### Properties - - - - - - - - - - - - - -
    NameValue
    tajo.driverorg.apache.tajo.jdbc.TajoDriver
    tajo.urljdbc:tajo://localhost:26002/default
    - -#### Dependencies - - - - - - - - - -
    ArtifactExcludes
    org.apache.tajo:tajo-jdbc:0.11.0
    - -## How to use +### Apache Phoenix -### Reference in paragraph +Phoenix supports `thick` and `thin` connection types: -Start the paragraphs with the `%jdbc`, this will use the `default` prefix for connection. If you want to use other connection you should specify the prefix of it as follows `%jdbc(prefix)`: + - [Thick client](#thick-client-connection) is faster, but must connect directly to ZooKeeper and HBase RegionServers. + - [Thin client](#thin-client-connection) has fewer dependencies and connects through a [Phoenix Query Server](http://phoenix.apache.org/server.html) instance. -```sql -%jdbc -SELECT * FROM db_name; +Use the appropriate `default.driver`, `default.url`, and the dependency artifact for your connection type. -``` +#### Thick client connection -or + -```sql -%jdbc(prefix) -SELECT * FROM db_name; +##### Properties + + + + + + + + + + + + + + + + + + + + + +
    NameValue
    default.driverorg.apache.phoenix.jdbc.PhoenixDriver
    default.urljdbc:phoenix:localhost:2181:/hbase-unsecure
    default.userphoenix_user
    default.passwordphoenix_password
    -``` +##### Dependencies + + + + + + + + + +
    ArtifactExcludes
    org.apache.phoenix:phoenix-core:4.4.0-HBase-1.0
    -### Apply Zeppelin Dynamic Forms +[Maven Repository: org.apache.phoenix:phoenix-core](https://mvnrepository.com/artifact/org.apache.phoenix/phoenix-core) -You can leverage [Zeppelin Dynamic Form](../manual/dynamicform.html) inside your queries. You can use both the `text input` and `select form` parametrization features +#### Thin client connection -```sql -%jdbc(prefix) -SELECT name, country, performer -FROM demo.performers -WHERE name='{{performer=Sheryl Crow|Doof|Fanfarlo|Los Paranoia}}' -``` + + +##### Properties + + + + + + + + + + + + + + + + + + + + + +
    NameValue
    default.driverorg.apache.phoenix.queryserver.client.Driver
    default.urljdbc:phoenix:thin:url=http://localhost:8765;serialization=PROTOBUF
    default.userphoenix_user
    default.passwordphoenix_password
    + +##### Dependencies + +Before Adding one of the below dependencies, check the Phoenix version first. + + + + + + + + + + + + + + + + + +
    ArtifactExcludesDescription
    org.apache.phoenix:phoenix-server-client:4.7.0-HBase-1.1For Phoenix 4.7
    org.apache.phoenix:phoenix-queryserver-client:4.8.0-HBase-1.2For Phoenix 4.8+
    + +[Maven Repository: org.apache.phoenix:phoenix-queryserver-client](https://mvnrepository.com/artifact/org.apache.phoenix/phoenix-queryserver-client) + +### Apache Tajo + + + +##### Properties + + + + + + + + + + + + + +
    NameValue
    default.driverorg.apache.tajo.jdbc.TajoDriver
    default.urljdbc:tajo://localhost:26002/default
    + +[Apache Tajo JDBC Driver Docs](https://tajo.apache.org/docs/current/jdbc_driver.html) + +##### Dependencies + + + + + + + + + +
    ArtifactExcludes
    org.apache.tajo:tajo-jdbc:0.11.0
    + +[Maven Repository: org.apache.tajo:tajo-jdbc](https://mvnrepository.com/artifact/org.apache.tajo/tajo-jdbc) -## Bugs & Reporting -If you find a bug for this interpreter, please create a [JIRA]( https://issues.apache.org/jira/browse/ZEPPELIN-382?jql=project%20%3D%20ZEPPELIN) ticket. +## Bug reporting +If you find a bug using JDBC interpreter, please create a [JIRA](https://issues.apache.org/jira/browse/ZEPPELIN) ticket. diff --git a/docs/interpreter/kylin.md b/docs/interpreter/kylin.md new file mode 100644 index 00000000000..e1d27d9907b --- /dev/null +++ b/docs/interpreter/kylin.md @@ -0,0 +1,82 @@ +--- +layout: page +title: "Apache Kylin Interpreter for Apache Zeppelin" +description: "Apache Kylin™ is an open source Distributed Analytics Engine designed to provide SQL interface and multi-dimensional analysis (OLAP) on Hadoop supporting extremely large datasets, original contributed from eBay Inc. +." +group: interpreter +--- + +{% include JB/setup %} + +# Apache Kylin Interpreter for Apache Zeppelin + +
    + +## Overview +[Apache Kylin](https://kylin.apache.org/) is an open source Distributed Analytics Engine designed to provide SQL interface and multi-dimensional analysis (OLAP) on Hadoop supporting extremely large datasets, original contributed from eBay Inc. The interpreter assumes that Apache Kylin has been installed and you can connect to Apache Kylin from the machine Apache Zeppelin is installed. +To get start with Apache Kylin, please see [Apache Kylin Quickstart](https://kylin.apache.org/docs15/index.html). + +## Configuration + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameDefaultDescription
    kylin.api.url http://localhost:7070/kylin/api/querykylin query POST API
    The format can be like http://<host>:<port>/kylin/api/query
    kylin.api.userADMINkylin user
    kylin.api.passwordKYLINkylin password
    kylin.query.projectlearn_kylinString, Project to perform query. Could update at notebook level
    kylin.query.ispartialtruetrue|false
    (@Deprecated since Apache Kylin V1.5)
    Whether accept a partial result or not, default be “false”. Set to “false” for production use.
    kylin.query.limit5000int, Query limit
    If limit is set in sql, perPage will be ignored.
    kylin.query.offset0int, Query offset
    If offset is set in sql, curIndex will be ignored.
    + +## Using the Apache Kylin Interpreter +In a paragraph, use `%kylin(project_name)` to select the **kylin** interpreter, **project name** and then input **sql**. If no project name defined, will use the default project name from the above configuration. + +``` +%kylin(learn_project) +select count(*) from kylin_sales group by part_dt +``` + diff --git a/docs/interpreter/lens.md b/docs/interpreter/lens.md index b4bcda49bd7..b929220339a 100644 --- a/docs/interpreter/lens.md +++ b/docs/interpreter/lens.md @@ -1,9 +1,22 @@ --- layout: page -title: "Lens Interpreter" -description: "Lens user guide" -group: manual +title: "Lens Interpreter for Apache Zeppelin" +description: "Apache Lens provides an Unified Analytics interface. Lens aims to cut the Data Analytics silos by providing a single view of data across multiple tiered data stores and optimal execution environment for the analytical query. It seamlessly integrates Hadoop with traditional data warehouses to appear like one." +group: interpreter --- + {% include JB/setup %} # Lens Interpreter for Apache Zeppelin diff --git a/docs/interpreter/livy.md b/docs/interpreter/livy.md index ef7c8ce6876..a7b776c0f09 100644 --- a/docs/interpreter/livy.md +++ b/docs/interpreter/livy.md @@ -1,9 +1,22 @@ --- layout: page -title: "Livy Interpreter" -description: "" -group: manual +title: "Livy Interpreter for Apache Zeppelin" +description: "Livy is an open source REST interface for interacting with Spark from anywhere. It supports executing snippets of code or programs in a Spark context that runs locally or in YARN." +group: interpreter --- + {% include JB/setup %} # Livy Interpreter for Apache Zeppelin @@ -27,32 +40,47 @@ Additional requirements for the Livy interpreter are: ## Configuration We added some common configurations for spark, and you can set any configuration you want. -This link contains all spark configurations: http://spark.apache.org/docs/latest/configuration.html#available-properties. +You can find all Spark configurations in [here](http://spark.apache.org/docs/latest/configuration.html#available-properties). And instead of starting property with `spark.` it should be replaced with `livy.spark.`. -Example: `spark.master` to `livy.spark.master` - +Example: `spark.driver.memory` to `livy.spark.driver.memory` + - - - - - - + - + - + + + + + + + + + + + + + + + + + + + + + @@ -102,8 +130,43 @@ Example: `spark.master` to `livy.spark.master` + + + + + + + + + + + + + + +
    Property Default Description
    livy.spark.masterlocal[*]Spark master uri. ex) spark://masterhost:7077
    zeppelin.livy.url http://localhost:8998 URL where livy server is running
    zeppelin.livy.spark.maxResultzeppelin.livy.spark.sql.maxResult 1000Max number of SparkSQL result to display.Max number of Spark SQL result to display.
    zeppelin.livy.spark.sql.field.truncatetrueWhether to truncate field values longer than 20 characters or not
    zeppelin.livy.session.create_timeout120Timeout in seconds for session creation
    zeppelin.livy.displayAppInfofalseWhether to display app info
    zeppelin.livy.pull_status.interval.millis1000The interval for checking paragraph execution status
    livy.spark.driver.cores Driver cores. ex) 1, 2. Upper bound for the number of executors.
    livy.spark.jars.packagesAdding extra libraries to livy interpreter
    zeppelin.livy.ssl.trustStoreclient trustStore file. Used when livy ssl is enabled
    zeppelin.livy.ssl.trustStorePasswordpassword for trustStore file. Used when livy ssl is enabled
    +**We remove livy.spark.master in zeppelin-0.7. Because we sugguest user to use livy 0.3 in zeppelin-0.7. And livy 0.3 don't allow to specify livy.spark.master, it enfornce yarn-cluster mode.** + +## Adding External libraries +You can load dynamic library to livy interpreter by set `livy.spark.jars.packages` property to comma-separated list of maven coordinates of jars to include on the driver and executor classpaths. The format for the coordinates should be groupId:artifactId:version. + +Example + + + + + + + + + + + + +
    PropertyExampleDescription
    livy.spark.jars.packagesio.spray:spray-json_2.10:1.3.1Adding extra libraries to livy interpreter
    + ## How to use Basically, you can use @@ -138,7 +201,7 @@ When Zeppelin server is running with authentication enabled, then this interpret ## Apply Zeppelin Dynamic Forms -You can leverage [Zeppelin Dynamic Form]({{BASE_PATH}}/manual/dynamicform.html). You can use both the `text input` and `select form` parameterization features. +You can leverage [Zeppelin Dynamic Form](../manual/dynamicform.html). You can use both the `text input` and `select form` parameterization features. ``` %livy.pyspark diff --git a/docs/interpreter/mahout.md b/docs/interpreter/mahout.md new file mode 100644 index 00000000000..c3b4146f62a --- /dev/null +++ b/docs/interpreter/mahout.md @@ -0,0 +1,220 @@ +--- +layout: page +title: "Mahout Interpreter for Apache Zeppelin" +description: "Apache Mahout provides a unified API (the R-Like Scala DSL) for quickly creating machine learning algorithms on a variety of engines." +group: interpreter +--- + +{% include JB/setup %} + +# Apache Mahout Interpreter for Apache Zeppelin + +
    + +## Installation + +Apache Mahout is a collection of packages that enable machine learning and matrix algebra on underlying engines such as Apache Flink or Apache Spark. A convenience script for creating and configuring two Mahout enabled interpreters exists. The `%sparkMahout` and `%flinkMahout` interpreters do not exist by default but can be easily created using this script. + +### Easy Installation +To quickly and easily get up and running using Apache Mahout, run the following command from the top-level directory of the Zeppelin install: +```bash +python scripts/mahout/add_mahout.py +``` + +This will create the `%sparkMahout` and `%flinkMahout` interpreters, and restart Zeppelin. + +### Advanced Installation + +The `add_mahout.py` script contains several command line arguments for advanced users. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArgumentDescriptionExample
    --zeppelin_homeThis is the path to the Zeppelin installation. This flag is not needed if the script is run from the top-level installation directory or from the `zeppelin/scripts/mahout` directory./path/to/zeppelin
    --mahout_homeIf the user has already installed Mahout, this flag can set the path to `MAHOUT_HOME`. If this is set, downloading Mahout will be skipped./path/to/mahout_home
    --restart_laterRestarting is necessary for updates to take effect. By default the script will restart Zeppelin for you- restart will be skipped if this flag is set.NA
    --force_downloadThis flag will force the script to re-download the binary even if it already exists. This is useful for previously failed downloads.NA
    --overwrite_existingThis flag will force the script to overwrite existing `%sparkMahout` and `%flinkMahout` interpreters. Useful when you want to just start over.NA
    + +__NOTE 1:__ Apache Mahout at this time only supports Spark 1.5 and Spark 1.6 and Scala 2.10. If the user is using another version of Spark (e.g. 2.0), the `%sparkMahout` will likely not work. The `%flinkMahout` interpreter will still work and the user is encouraged to develop with that engine as the code can be ported via copy and paste, as is evidenced by the tutorial notebook. + +__NOTE 2:__ If using Apache Flink in cluster mode, the following libraries will also need to be coppied to `${FLINK_HOME}/lib` +- mahout-math-0.12.2.jar +- mahout-math-scala_2.10-0.12.2.jar +- mahout-flink_2.10-0.12.2.jar +- mahout-hdfs-0.12.2.jar +- [com.google.guava:guava:14.0.1](http://central.maven.org/maven2/com/google/guava/guava/14.0.1/guava-14.0.1.jar) + +## Overview + +The [Apache Mahout](http://mahout.apache.org/)™ project's goal is to build an environment for quickly creating scalable performant machine learning applications. + +Apache Mahout software provides three major features: + +- A simple and extensible programming environment and framework for building scalable algorithms +- A wide variety of premade algorithms for Scala + Apache Spark, H2O, Apache Flink +- Samsara, a vector math experimentation environment with R-like syntax which works at scale + +In other words: + +*Apache Mahout provides a unified API for quickly creating machine learning algorithms on a variety of engines.* + +## How to use + +When starting a session with Apache Mahout, depending on which engine you are using (Spark or Flink), a few imports must be made and a _Distributed Context_ must be declared. Copy and paste the following code and run once to get started. + +### Flink + +```scala +%flinkMahout + +import org.apache.flink.api.scala._ +import org.apache.mahout.math.drm._ +import org.apache.mahout.math.drm.RLikeDrmOps._ +import org.apache.mahout.flinkbindings._ +import org.apache.mahout.math._ +import scalabindings._ +import RLikeOps._ + +implicit val ctx = new FlinkDistributedContext(benv) +``` + +### Spark +```scala +%sparkMahout + +import org.apache.mahout.math._ +import org.apache.mahout.math.scalabindings._ +import org.apache.mahout.math.drm._ +import org.apache.mahout.math.scalabindings.RLikeOps._ +import org.apache.mahout.math.drm.RLikeDrmOps._ +import org.apache.mahout.sparkbindings._ + +implicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext = sc2sdc(sc) +``` + +### Same Code, Different Engines + +After importing and setting up the distributed context, the Mahout R-Like DSL is consistent across engines. The following code will run in both `%flinkMahout` and `%sparkMahout` + +```scala +val drmData = drmParallelize(dense( + (2, 2, 10.5, 10, 29.509541), // Apple Cinnamon Cheerios + (1, 2, 12, 12, 18.042851), // Cap'n'Crunch + (1, 1, 12, 13, 22.736446), // Cocoa Puffs + (2, 1, 11, 13, 32.207582), // Froot Loops + (1, 2, 12, 11, 21.871292), // Honey Graham Ohs + (2, 1, 16, 8, 36.187559), // Wheaties Honey Gold + (6, 2, 17, 1, 50.764999), // Cheerios + (3, 2, 13, 7, 40.400208), // Clusters + (3, 3, 13, 4, 45.811716)), numPartitions = 2) + +drmData.collect(::, 0 until 4) + +val drmX = drmData(::, 0 until 4) +val y = drmData.collect(::, 4) +val drmXtX = drmX.t %*% drmX +val drmXty = drmX.t %*% y + + +val XtX = drmXtX.collect +val Xty = drmXty.collect(::, 0) +val beta = solve(XtX, Xty) +``` + +## Leveraging Resource Pools and R for Visualization + +Resource Pools are a powerful Zeppelin feature that lets us share information between interpreters. A fun trick is to take the output of our work in Mahout and analyze it in other languages. + +### Setting up a Resource Pool in Flink + +In Spark based interpreters resource pools are accessed via the ZeppelinContext API. To put and get things from the resource pool one can be done simple +```scala +val myVal = 1 +z.put("foo", myVal) +val myFetchedVal = z.get("foo") +``` + +To add this functionality to a Flink based interpreter we declare the follwoing + +```scala +%flinkMahout + +import org.apache.zeppelin.interpreter.InterpreterContext + +val z = InterpreterContext.get().getResourcePool() +``` + +Now we can access the resource pool in a consistent manner from the `%flinkMahout` interpreter. + + +### Passing a variable from Mahout to R and Plotting + +In this simple example, we use Mahout (on Flink or Spark, the code is the same) to create a random matrix and then take the Sin of each element. We then randomly sample the matrix and create a tab separated string. Finally we pass that string to R where it is read as a .tsv file, and a DataFrame is created and plotted using native R plotting libraries. + +```scala +val mxRnd = Matrices.symmetricUniformView(5000, 2, 1234) +val drmRand = drmParallelize(mxRnd) + + +val drmSin = drmRand.mapBlock() {case (keys, block) => + val blockB = block.like() + for (i <- 0 until block.nrow) { + blockB(i, 0) = block(i, 0) + blockB(i, 1) = Math.sin((block(i, 0) * 8)) + } + keys -> blockB +} + +z.put("sinDrm", org.apache.mahout.math.drm.drmSampleToTSV(drmSin, 0.85)) +``` + +And then in an R paragraph... + +```r +%spark.r {"imageWidth": "400px"} + +library("ggplot2") + +sinStr = z.get("flinkSinDrm") + +data <- read.table(text= sinStr, sep="\t", header=FALSE) + +plot(data, col="red") +``` diff --git a/docs/interpreter/markdown.md b/docs/interpreter/markdown.md index 21184dcf762..46fd1701ba3 100644 --- a/docs/interpreter/markdown.md +++ b/docs/interpreter/markdown.md @@ -1,9 +1,22 @@ --- layout: page -title: "Markdown Interpreter" -description: "Markdown Interpreter" -group: manual +title: "Markdown Interpreter for Apache Zeppelin" +description: "Markdown is a plain text formatting syntax designed so that it can be converted to HTML. Apache Zeppelin uses markdown4j." +group: interpreter --- + {% include JB/setup %} # Markdown Interpreter for Apache Zeppelin @@ -12,14 +25,52 @@ group: manual ## Overview [Markdown](http://daringfireball.net/projects/markdown/) is a plain text formatting syntax designed so that it can be converted to HTML. -Zeppelin uses markdown4j. For more examples and extension support, please checkout [here](https://code.google.com/p/markdown4j/). +Apache Zeppelin uses [pegdown](https://github.com/sirthias/pegdown) and [markdown4j](https://github.com/jdcasey/markdown4j) as markdown parsers. + In Zeppelin notebook, you can use ` %md ` in the beginning of a paragraph to invoke the Markdown interpreter and generate static html from Markdown plain text. -In Zeppelin, Markdown interpreter is enabled by default. +In Zeppelin, Markdown interpreter is enabled by default and uses the [pegdown](https://github.com/sirthias/pegdown) parser. - + ## Example + The following example demonstrates the basic usage of Markdown in a Zeppelin notebook. - + + +## Mathematical expression + +Markdown interpreter leverages %html display system internally. That means you can mix mathematical expressions with markdown syntax. For more information, please see [Mathematical Expression](../displaysystem/basicdisplaysystem.html#mathematical-expressions) section. + +## Configuration + + + + + + + + + + + +
    NameDefault ValueDescription
    markdown.parser.typepegdownMarkdown Parser Type.
    Available values: pegdown, markdown4j.
    + + +### Pegdown Parser + +`pegdown` parser provides github flavored markdown. + + + +`pegdown` parser provides [YUML](http://yuml.me/) and [Websequence](https://www.websequencediagrams.com/) plugins also. + + + +### Markdown4j Parser + +Since pegdown parser is more accurate and provides much more markdown syntax +`markdown4j` option might be removed later. But keep this parser for the backward compatibility. + + diff --git a/docs/interpreter/pig.md b/docs/interpreter/pig.md new file mode 100644 index 00000000000..d1f18fa8cb1 --- /dev/null +++ b/docs/interpreter/pig.md @@ -0,0 +1,148 @@ +--- +layout: page +title: "Pig Interpreter for Apache Zeppelin" +description: "Apache Pig is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs." +group: manual +--- +{% include JB/setup %} + + +# Pig Interpreter for Apache Zeppelin + +
    + +## Overview +[Apache Pig](https://pig.apache.org/) is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large data sets. + +## Supported interpreter type + - `%pig.script` (default Pig interpreter, so you can use `%pig`) + + `%pig.script` is like the Pig grunt shell. Anything you can run in Pig grunt shell can be run in `%pig.script` interpreter, it is used for running Pig script where you don’t need to visualize the data, it is suitable for data munging. + + - `%pig.query` + + `%pig.query` is a little different compared with `%pig.script`. It is used for exploratory data analysis via Pig latin where you can leverage Zeppelin’s visualization ability. There're 2 minor differences in the last statement between `%pig.script` and `%pig.query` + - No pig alias in the last statement in `%pig.query` (read the examples below). + - The last statement must be in single line in `%pig.query` + +## Supported runtime mode + - Local + - MapReduce + - Tez_Local (Only Tez 0.7 is supported) + - Tez (Only Tez 0.7 is supported) + +## How to use + +### How to setup Pig + +- Local Mode + + Nothing needs to be done for local mode + +- MapReduce Mode + + HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`. + +- Tez Local Mode + + Nothing needs to be done for tez local mode + +- Tez Mode + + HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`. + +### How to configure interpreter + +At the Interpreters menu, you have to create a new Pig interpreter. Pig interpreter has below properties by default. +And you can set any Pig properties here which will be passed to Pig engine. (like tez.queue.name & mapred.job.queue.name). +Besides, we use paragraph title as job name if it exists, else use the last line of Pig script. So you can use that to find app running in YARN RM UI. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    PropertyDefaultDescription
    zeppelin.pig.execTypemapreduceExecution mode for pig runtime. local | mapreduce | tez_local | tez
    zeppelin.pig.includeJobStatsfalsewhether display jobStats info in %pig.script
    zeppelin.pig.maxResult1000max row number displayed in %pig.query
    tez.queue.namedefaultqueue name for tez engine
    mapred.job.queue.namedefaultqueue name for mapreduce engine
    + +### Example + +##### pig + +``` +%pig + +bankText = load 'bank.csv' using PigStorage(';'); +bank = foreach bankText generate $0 as age, $1 as job, $2 as marital, $3 as education, $5 as balance; +bank = filter bank by age != '"age"'; +bank = foreach bank generate (int)age, REPLACE(job,'"','') as job, REPLACE(marital, '"', '') as marital, (int)(REPLACE(balance, '"', '')) as balance; +store bank into 'clean_bank.csv' using PigStorage(';'); -- this statement is optional, it just show you that most of time %pig.script is used for data munging before querying the data. +``` + +##### pig.query + +Get the number of each age where age is less than 30 + +``` +%pig.query + +bank_data = filter bank by age < 30; +b = group bank_data by age; +foreach b generate group, COUNT($1); +``` + +The same as above, but use dynamic text form so that use can specify the variable maxAge in textbox. (See screenshot below). Dynamic form is a very cool feature of Zeppelin, you can refer this [link]((../manual/dynamicform.html)) for details. + +``` +%pig.query + +bank_data = filter bank by age < ${maxAge=40}; +b = group bank_data by age; +foreach b generate group, COUNT($1) as count; +``` + +Get the number of each age for specific marital type, also use dynamic form here. User can choose the marital type in the dropdown list (see screenshot below). + +``` +%pig.query + +bank_data = filter bank by marital=='${marital=single,single|divorced|married}'; +b = group bank_data by age; +foreach b generate group, COUNT($1) as count; +``` + +The above examples are in the Pig tutorial note in Zeppelin, you can check that for details. Here's the screenshot. + + + + +Data is shared between `%pig` and `%pig.query`, so that you can do some common work in `%pig`, and do different kinds of query based on the data of `%pig`. +Besides, we recommend you to specify alias explicitly so that the visualization can display the column name correctly. In the above example 2 and 3 of `%pig.query`, we name `COUNT($1)` as `count`. If you don't do this, +then we will name it using position. E.g. in the above first example of `%pig.query`, we will use `col_1` in chart to represent `COUNT($1)`. + + diff --git a/docs/interpreter/postgresql.md b/docs/interpreter/postgresql.md index 107fda1d809..52dac169c6f 100644 --- a/docs/interpreter/postgresql.md +++ b/docs/interpreter/postgresql.md @@ -1,217 +1,29 @@ ---- -layout: page -title: "PostgreSQL and HAWQ Interpreter" -description: "" -group: manual ---- -{% include JB/setup %} - -# PostgreSQL, Apache HAWQ (incubating) Interpreter for Apache Zeppelin - -
    - -## Important Notice - -Postgresql Interpreter will be deprecated and merged into JDBC Interpreter. You can use Postgresql by using JDBC Interpreter with same functionality. See the example below of settings and dependencies. - -### Properties - - - - - - - - - - - - - - - - - - - - - -
    PropertyValue
    psql.driverorg.postgresql.Driver
    psql.urljdbc:postgresql://localhost:5432/
    psql.userpsqlUser
    psql.passwordpsqlPassword
    - -### Dependencies - - - - - - - - - -
    ArtifactExclude
    org.postgresql:postgresql:9.4-1201-jdbc41
    ---- - -## Overview - -[zeppelin-view](https://www.youtube.com/watch?v=wqXXQhJ5Uk8) - -This interpreter seamlessly supports the following SQL data processing engines: - -* [PostgreSQL](http://www.postgresql.org/) - OSS, Object-relational database management system (ORDBMS) -* [pache HAWQ (incubating)](http://hawq.incubator.apache.org/) - Powerful open source SQL-On-Hadoop engine. -* [Greenplum](http://pivotal.io/big-data/pivotal-greenplum-database) - MPP database built on open source PostgreSQL. - -This [Video Tutorial](https://www.youtube.com/watch?v=wqXXQhJ5Uk8) illustrates some of the features provided by the `Postgresql Interpreter`. - - - - - - - - - - - - -
    NameClassDescription
    %psql.sqlPostgreSqlInterpreterProvides SQL environment for PostgreSQL, HAWQ and Greenplum
    - -## Create Interpreter -By default Zeppelin creates one `PSQL` instance. You can remove it or create new instances. - -Multiple PSQL instances can be created, each configured to the same or different backend databases. But over time a `Notebook` can have only one PSQL interpreter instance `bound`. That means you _cannot_ connect to different databases in the same `Notebook`. This is a known Zeppelin limitation. - -To create new PSQL instance open the `Interpreter` section and click the `+Create` button. Pick a `Name` of your choice and from the `Interpreter` drop-down select `psql`. Then follow the configuration instructions and `Save` the new instance. - -> Note: The `Name` of the instance is used only to distinct the instances while binding them to the `Notebook`. The `Name` is irrelevant inside the `Notebook`. In the `Notebook` you must use `%psql.sql` tag. - -## Bind to Notebook -In the `Notebook` click on the `settings` icon in the top right corner. The select/deselect the interpreters to be bound with the `Notebook`. - -## Configuration -You can modify the configuration of the PSQL from the `Interpreter` section. The PSQL interpreter expenses the following properties: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Property NameDescriptionDefault Value
    postgresql.urlJDBC URL to connect to jdbc:postgresql://localhost:5432
    postgresql.userJDBC user namegpadmin
    postgresql.passwordJDBC password
    postgresql.driver.nameJDBC driver name. In this version the driver name is fixed and should not be changedorg.postgresql.Driver
    postgresql.max.resultMax number of SQL result to display to prevent the browser overload1000
    - -## How to use -``` -Tip: Use (CTRL + .) for SQL auto-completion. -``` - -### DDL and SQL commands -Start the paragraphs with the full `%psql.sql` prefix tag! The short notation: `%psql` would still be able run the queries but the syntax highlighting and the auto-completions will be disabled. - -You can use the standard CREATE / DROP / INSERT commands to create or modify the data model: - -```sql -%psql.sql -drop table if exists mytable; -create table mytable (i int); -insert into mytable select generate_series(1, 100); -``` - -Then in a separate paragraph run the query. - -```sql -%psql.sql -select * from mytable; -``` - -> Note: You can have multiple queries in the same paragraph but only the result from the first is displayed. [[1](https://issues.apache.org/jira/browse/ZEPPELIN-178)], [[2](https://issues.apache.org/jira/browse/ZEPPELIN-212)]. - -For example, this will execute both queries but only the count result will be displayed. If you revert the order of the queries the mytable content will be shown instead. - -```sql -%psql.sql -select count(*) from mytable; -select * from mytable; -``` - -### PSQL command line tools -Use the Shell Interpreter (`%sh`) to access the command line [PSQL](http://www.postgresql.org/docs/9.4/static/app-psql.html) interactively: - -```bash -%sh -psql -h phd3.localdomain -U gpadmin -p 5432 < +{% include JB/setup %} + +# PostgreSQL, Apache HAWQ (incubating) Interpreter for Apache Zeppelin + +
    + +## Important Notice + + +Postgresql interpreter is deprecated and merged into [JDBC Interpreter](./jdbc.html). You can use it with JDBC Interpreter as same functionality. See [Postgresql setting example](./jdbc.html#postgres) for more detailed information. diff --git a/docs/interpreter/python.md b/docs/interpreter/python.md index d43449369e9..8d5d4b85062 100644 --- a/docs/interpreter/python.md +++ b/docs/interpreter/python.md @@ -1,9 +1,22 @@ --- layout: page -title: "Python Interpreter" -description: "Python Interpreter" -group: manual +title: "Python 2 & 3 Interpreter for Apache Zeppelin" +description: "Python is a programming language that lets you work quickly and integrate systems more effectively." +group: interpreter --- + {% include JB/setup %} # Python 2 & 3 Interpreter for Apache Zeppelin @@ -18,12 +31,17 @@ group: manual Description - python + zeppelin.python python Path of the already installed Python binary (could be python2 or python3). If python is not in your $PATH you can set the absolute directory (example : /usr/bin/python) + + zeppelin.python.maxResult + 1000 + Max number of dataframe rows to display. + ## Enabling Python Interpreter @@ -38,10 +56,64 @@ The interpreter can only work if you already have python installed (the interpre To access the help, type **help()** -## Python modules +## Python environments + +### Default +By default, PythonInterpreter will use python command defined in `zeppelin.python` property to run python process. The interpreter can use all modules already installed (with pip, easy_install...) -## Use Zeppelin Dynamic Forms +### Conda +[Conda](http://conda.pydata.org/) is an package management system and environment management system for python. +`%python.conda` interpreter lets you change between environments. + +#### Usage + +List your environments + +``` +%python.conda +``` + +Activate an environment + +``` +%python.conda activate [ENVIRONMENT_NAME] +``` + +Deactivate + +``` +%python.conda deactivate +``` + +### Docker + +`%python.docker` interpreter allows PythonInterpreter creates python process in a specified docker container. + +#### Usage + +Activate an environment + +``` +%python.docker activate [Repository] +%python.docker activate [Repository:Tag] +%python.docker activate [Image Id] +``` + +Deactivate + +``` +%python.docker deactivate +``` + +Example + +``` +# activate latest tensorflow image as a python environment +%python.docker activate gcr.io/tensorflow/tensorflow:latest +``` + +## Using Zeppelin Dynamic Forms You can leverage [Zeppelin Dynamic Form]({{BASE_PATH}}/manual/dynamicform.html) inside your Python code. **Zeppelin Dynamic Form can only be used if py4j Python library is installed in your system. If not, you can install it with `pip install py4j`.** @@ -60,16 +132,27 @@ print (z.select("f1",[("o1","1"),("o2","2")],"2")) print("".join(z.checkbox("f3", [("o1","1"), ("o2","2")],["1"]))) ``` -## Zeppelin features not fully supported by the Python Interpreter - -* Interrupt a paragraph execution (`cancel()` method) is currently only supported in Linux and MacOs. If interpreter runs in another operating system (for instance MS Windows) , interrupt a paragraph will close the whole interpreter. A JIRA ticket ([ZEPPELIN-893](https://issues.apache.org/jira/browse/ZEPPELIN-893)) is opened to implement this feature in a next release of the interpreter. -* Progression bar in webUI (`getProgress()` method) is currently not implemented. -* Code-completion is currently not implemented. - ## Matplotlib integration - The python interpreter can display matplotlib graph with the function `z.show()`. - You need to have matplotlib module installed and a XServer running to use this functionality ! + + The python interpreter can display matplotlib figures inline automatically using the `pyplot` module: +```python +%python +import matplotlib.pyplot as plt +plt.plot([1, 2, 3]) +``` +This is the recommended method for using matplotlib from within a Zeppelin notebook. The output of this command will by default be converted to HTML by implicitly making use of the `%html` magic. Additional configuration can be achieved using the builtin `z.configure_mpl()` method. For example, + +```python +z.configure_mpl(width=400, height=300, fmt='svg') +plt.plot([1, 2, 3]) +``` + +Will produce a 400x300 image in SVG format, which by default are normally 600x400 and PNG respectively. In the future, another option called `angular` can be used to make it possible to update a plot produced from one paragraph directly from another (the output will be `%angular` instead of `%html`). However, this feature is already available in the `pyspark` interpreter. More details can be found in the included "Zeppelin Tutorial: Python - matplotlib basic" tutorial notebook. + +If Zeppelin cannot find the matplotlib backend files (which should usually be found in `$ZEPPELIN_HOME/interpreter/lib/python`) in your `PYTHONPATH`, then the backend will automatically be set to agg, and the (otherwise deprecated) instructions below can be used for more limited inline plotting. + +If you are unable to load the inline backend, use `z.show(plt)`: ```python %python import matplotlib.pyplot as plt @@ -78,18 +161,18 @@ plt.figure() z.show(plt) plt.close() ``` -z.show function can take optional parameters to adapt graph width and height +The `z.show()` function can take optional parameters to adapt graph dimensions (width and height) as well as output format (png or optionally svg). ```python %python z.show(plt, width='50px') -z.show(plt, height='150px') +z.show(plt, height='150px', fmt='svg') ``` ## Pandas integration -[Zeppelin Display System]({{BASE_PATH}}/displaysystem/basicdisplaysystem.html#table) provides simple API to visualize data in Pandas DataFrames, same as in Matplotlib. +Apache Zeppelin [Table Display System](../displaysystem/basicdisplaysystem.html#table) provides built-in data visualization capabilities. Python interpreter leverages it to visualize Pandas DataFrames though similar `z.show()` API, same as with [Matplotlib integration](#matplotlib-integration). Example: @@ -99,6 +182,42 @@ rates = pd.read_csv("bank.csv", sep=";") z.show(rates) ``` +## SQL over Pandas DataFrames + +There is a convenience `%python.sql` interpreter that matches Apache Spark experience in Zeppelin and enables usage of SQL language to query [Pandas DataFrames](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html) and visualization of results though built-in [Table Display System](../displaysystem/basicdisplaysystem.html#table). + + **Pre-requests** + + - Pandas `pip install pandas` + - PandaSQL `pip install -U pandasql` + +In case default binded interpreter is Python (first in the interpreter list, under the _Gear Icon_), you can just use it as `%sql` i.e + + - first paragraph + + ```python +import pandas as pd +rates = pd.read_csv("bank.csv", sep=";") + ``` + + - next paragraph + + ```sql +%sql +SELECT * FROM rates WHERE age < 40 + ``` + +Otherwise it can be referred to as `%python.sql` + + ## Technical description -For in-depth technical details on current implementation plese reffer [python/README.md](https://github.com/apache/zeppelin/blob/master/python/README.md). +For in-depth technical details on current implementation please refer to [python/README.md](https://github.com/apache/zeppelin/blob/master/python/README.md). + + +### Some features not yet implemented in the Python Interpreter + +* Interrupt a paragraph execution (`cancel()` method) is currently only supported in Linux and MacOs. If interpreter runs in another operating system (for instance MS Windows) , interrupt a paragraph will close the whole interpreter. A JIRA ticket ([ZEPPELIN-893](https://issues.apache.org/jira/browse/ZEPPELIN-893)) is opened to implement this feature in a next release of the interpreter. +* Progression bar in webUI (`getProgress()` method) is currently not implemented. +* Code-completion is currently not implemented. + diff --git a/docs/interpreter/r.md b/docs/interpreter/r.md index c03e548cabf..2b224917ceb 100644 --- a/docs/interpreter/r.md +++ b/docs/interpreter/r.md @@ -1,9 +1,22 @@ --- layout: page -title: "R Interpreter" -description: "" -group: manual +title: "R Interpreter for Apache Zeppelin" +description: "R is a free software environment for statistical computing and graphics." +group: interpreter --- + {% include JB/setup %} # R Interpreter for Apache Zeppelin diff --git a/docs/interpreter/scalding.md b/docs/interpreter/scalding.md index e8774df67fa..22027f22dee 100644 --- a/docs/interpreter/scalding.md +++ b/docs/interpreter/scalding.md @@ -1,9 +1,22 @@ --- layout: page -title: "Scalding Interpreter" -description: "" -group: manual +title: "Scalding Interpreter for Apache Zeppelin" +description: "Scalding is an open source Scala library for writing MapReduce jobs." +group: interpreter --- + {% include JB/setup %} # Scalding Interpreter for Apache Zeppelin diff --git a/docs/interpreter/scio.md b/docs/interpreter/scio.md new file mode 100644 index 00000000000..cb8d1278ec0 --- /dev/null +++ b/docs/interpreter/scio.md @@ -0,0 +1,169 @@ +--- +layout: page +title: "Scio Interpreter for Apache Zeppelin" +description: "Scio is a Scala DSL for Apache Beam/Google Dataflow model." +group: interpreter +--- + +{% include JB/setup %} + +# Scio Interpreter for Apache Zeppelin + +
    + +## Overview +Scio is a Scala DSL for [Google Cloud Dataflow](https://github.com/GoogleCloudPlatform/DataflowJavaSDK) and [Apache Beam](http://beam.incubator.apache.org/) inspired by [Spark](http://spark.apache.org/) and [Scalding](https://github.com/twitter/scalding). See the current [wiki](https://github.com/spotify/scio/wiki) and [API documentation](http://spotify.github.io/scio/) for more information. + +## Configuration + + + + + + + + + + + + + + + + + +
    NameDefault ValueDescription
    zeppelin.scio.argz--runner=InProcessPipelineRunnerScio interpreter wide arguments. Documentation: https://github.com/spotify/scio/wiki#options and https://cloud.google.com/dataflow/pipelines/specifying-exec-params
    zeppelin.scio.maxResult1000Max number of SCollection results to display
    + +## Enabling the Scio Interpreter + +In a notebook, to enable the **Scio** interpreter, click the **Gear** icon and select **beam** (**beam.scio**). + +## Using the Scio Interpreter + +In a paragraph, use `%beam.scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). State (like variables, imports, execution etc) is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a Scio context via standard `ContextAndArgs`. + +```scala +%beam.scio +val (sc, args) = ContextAndArgs(argz) +``` + +Use `sc` context the way you would in a regular pipeline/REPL. + +Example: + +```scala +%beam.scio +val (sc, args) = ContextAndArgs(argz) +sc.parallelize(Seq("foo", "foo", "bar")).countByValue.closeAndDisplay() +``` + +If you close Scio context, go ahead an create a new one using `ContextAndArgs`. Please refer to [Scio wiki](https://github.com/spotify/scio/wiki) for more complex examples. You can close Scio context much the same way as in Scio REPL, and use Zeppelin display helpers to synchronously close and display results - read more below. + +### Progress + +There can be only one paragraph running at once. There is no notion of overall progress, thus progress bar will show `0`. + +### SCollection display helpers + +Scio interpreter comes with display helpers to ease working with Zeppelin notebooks. Simply use `closeAndDisplay()` on `SCollection` to close context and display the results. The number of results is limited by `zeppelin.scio.maxResult` (by default 1000). + +Supported `SCollection` types: + + * Scio's typed BigQuery + * Scala's Products (case classes, tuples) + * Google BigQuery's TableRow + * Apache Avro + * All Scala's `AnyVal` + +#### Helper methods + +There are different helper methods for different objects. You can easily display results from `SCollection`, `Future[Tap]` and `Tap`. + +##### `SCollection` helper + +`SCollection` has `closeAndDisplay` Zeppelin helper method for types listed above. Use it to synchronously close Scio context, and once available pull and display results. + +##### `Future[Tap]` helper + +`Future[Tap]` has `waitAndDisplay` Zeppelin helper method for types listed above. Use it to synchronously wait for results, and once available pull and display results. + +##### `Tap` helper + +`Tap` has `display` Zeppelin helper method for types listed above. Use it to pull and display results. + +### Examples + +#### BigQuery example: + +```scala +%beam.scio +@BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays + |FROM [bigquery-samples:airline_ontime_data.flights] + |group by departure_airport + |order by 2 desc + |limit 10""".stripMargin) class Flights + +val (sc, args) = ContextAndArgs(argz) +sc.bigQuerySelect(Flights.query).closeAndDisplay(Flights.schema) +``` + +#### BigQuery typed example: + +```scala +%beam.scio +@BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays + |FROM [bigquery-samples:airline_ontime_data.flights] + |group by departure_airport + |order by 2 desc + |limit 10""".stripMargin) class Flights + +val (sc, args) = ContextAndArgs(argz) +sc.typedBigQuery[Flights]().flatMap(_.no_of_delays).mean.closeAndDisplay() +``` + +#### Avro example: + +```scala +%beam.scio +import com.spotify.data.ExampleAvro + +val (sc, args) = ContextAndArgs(argz) +sc.avroFile[ExampleAvro]("gs:///tmp/my.avro").take(10).closeAndDisplay() +``` + +#### Avro example with a view schema: + +```scala +%beam.scio +import com.spotify.data.ExampleAvro +import org.apache.avro.Schema + +val (sc, args) = ContextAndArgs(argz) +val view = Schema.parse("""{"type":"record","name":"ExampleAvro","namespace":"com.spotify.data","fields":[{"name":"track","type":"string"}, {"name":"artist", "type":"string"}]}""") + +sc.avroFile[EndSongCleaned]("gs:///tmp/my.avro").take(10).closeAndDisplay(view) +``` + +### Google credentials + +Scio Interpreter will try to infer your Google Cloud credentials from its environment, it will take into the account: + + * `argz` interpreter settings ([doc](https://github.com/spotify/scio/wiki#options)) + * environment variable (`GOOGLE_APPLICATION_CREDENTIALS`) + * gcloud configuration + +#### BigQuery macro credentials + +Currently BigQuery project for macro expansion is inferred using Google Dataflow's [DefaultProjectFactory().create()](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java#L187) diff --git a/docs/interpreter/shell.md b/docs/interpreter/shell.md new file mode 100644 index 00000000000..b4b36dd55cc --- /dev/null +++ b/docs/interpreter/shell.md @@ -0,0 +1,68 @@ +--- +layout: page +title: "Shell interpreter for Apache Zeppelin" +description: "Shell interpreter uses Apache Commons Exec to execute external processes." +group: interpreter +--- + +{% include JB/setup %} + +# Shell interpreter for Apache Zeppelin + +
    + +## Overview +Shell interpreter uses [Apache Commons Exec](https://commons.apache.org/proper/commons-exec) to execute external processes. +In Zeppelin notebook, you can use ` %sh ` in the beginning of a paragraph to invoke system shell and run commands. + +> **Note :** Currently each command runs as the user Zeppelin server is running as. + +## Configuration +At the "Interpreters" menu in Zeppelin dropdown menu, you can set the property value for Shell interpreter. + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameValueDescription
    shell.command.timeout.millisecs60000Shell command time out in millisecs
    zeppelin.shell.auth.typeTypes of authentications' methods supported are SIMPLE, and KERBEROS
    zeppelin.shell.principalThe principal name to load from the keytab
    zeppelin.shell.keytab.locationThe path to the keytab file
    + +## Example +The following example demonstrates the basic usage of Shell in a Zeppelin notebook. + + + +If you need further information about **Zeppelin Interpreter Setting** for using Shell interpreter, please read [What is interpreter setting?](../manual/interpreters.html#what-is-interpreter-setting) section first. \ No newline at end of file diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md index a183033bf7c..59b3430584d 100644 --- a/docs/interpreter/spark.md +++ b/docs/interpreter/spark.md @@ -1,21 +1,32 @@ --- layout: page -title: "Spark Interpreter Group" -description: "" -group: manual +title: "Apache Spark Interpreter for Apache Zeppelin" +description: "Apache Spark is a fast and general-purpose cluster computing system. It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution engine." +group: interpreter --- + {% include JB/setup %} - # Spark Interpreter for Apache Zeppelin
    ## Overview [Apache Spark](http://spark.apache.org) is a fast and general-purpose cluster computing system. -It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution graphs -Apache Spark is supported in Zeppelin with -Spark Interpreter group, which consists of five interpreters. +It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution graphs. +Apache Spark is supported in Zeppelin with Spark interpreter group which consists of below five interpreters. @@ -26,25 +37,25 @@ Spark Interpreter group, which consists of five interpreters. - + - + - + - + - + - + @@ -79,7 +90,7 @@ You can also set other Spark properties which are not listed in the table. For a - + @@ -93,9 +104,16 @@ You can also set other Spark properties which are not listed in the table. For a - + - + + + + + + @@ -105,7 +123,7 @@ You can also set other Spark properties which are not listed in the table. For a - + @@ -122,116 +140,128 @@ You can also set other Spark properties which are not listed in the table. For a + + + + +
    %spark SparkInterpreterCreates a SparkContext and provides a scala environmentCreates a SparkContext and provides a Scala environment
    %pyspark%spark.pyspark PySparkInterpreterProvides a python environmentProvides a Python environment
    %r%spark.r SparkRInterpreter Provides an R environment with SparkR support
    %sql%spark.sql SparkSQLInterpreter Provides a SQL environment
    %dep%spark.dep DepInterpreter Dependency loader
    spark.executor.memory 512m1g Executor memory per worker instance.
    ex) 512m, 32g
    Local repository for dependency loader
    zeppelin.pyspark.pythonPYSPARK_PYTHON pythonPython command to run pyspark withPython binary executable to use for PySpark in both driver and workers (default is python). + Property spark.pyspark.python take precedence if it is set
    PYSPARK_DRIVER_PYTHONpythonPython binary executable to use for PySpark in driver only (default is PYSPARK_PYTHON). + Property spark.pyspark.driver.python take precedence if it is set
    zeppelin.spark.concurrentSQL
    zeppelin.spark.maxResult 1000Max number of SparkSQL result to display.Max number of Spark SQL result to display.
    zeppelin.spark.printREPLOutputtrue Import implicits, UDF collection, and sql if set true.
    zeppelin.spark.enableSupportedVersionChecktrueDo not change - developer only setting, not for production use
    Without any configuration, Spark interpreter works out of box in local mode. But if you want to connect to your Spark cluster, you'll need to follow below two simple steps. ### 1. Export SPARK_HOME -In **conf/zeppelin-env.sh**, export `SPARK_HOME` environment variable with your Spark installation path. +In `conf/zeppelin-env.sh`, export `SPARK_HOME` environment variable with your Spark installation path. -for example +For example, ```bash export SPARK_HOME=/usr/lib/spark ``` -You can optionally export HADOOP\_CONF\_DIR and SPARK\_SUBMIT\_OPTIONS +You can optionally set more environment variables ```bash +# set hadoop conf dir export HADOOP_CONF_DIR=/usr/lib/hadoop + +# set options to pass spark-submit command export SPARK_SUBMIT_OPTIONS="--packages com.databricks:spark-csv_2.10:1.2.0" + +# extra classpath. e.g. set classpath for hive-site.xml +export ZEPPELIN_INTP_CLASSPATH_OVERRIDES=/etc/hive/conf ``` -For Windows, ensure you have `winutils.exe` in `%HADOOP_HOME%\bin`. For more details please see [Problems running Hadoop on Windows](https://wiki.apache.org/hadoop/WindowsProblems) +For Windows, ensure you have `winutils.exe` in `%HADOOP_HOME%\bin`. Please see [Problems running Hadoop on Windows](https://wiki.apache.org/hadoop/WindowsProblems) for the details. ### 2. Set master in Interpreter menu After start Zeppelin, go to **Interpreter** menu and edit **master** property in your Spark interpreter setting. The value may vary depending on your Spark cluster deployment type. -for example, +For example, * **local[*]** in local mode * **spark://master:7077** in standalone cluster * **yarn-client** in Yarn client mode * **mesos://host:5050** in Mesos cluster -That's it. Zeppelin will work with any version of Spark and any deployment type without rebuilding Zeppelin in this way. (Zeppelin 0.5.6-incubating release works up to Spark 1.6.1 ) +That's it. Zeppelin will work with any version of Spark and any deployment type without rebuilding Zeppelin in this way. +For the further information about Spark & Zeppelin version compatibility, please refer to "Available Interpreters" section in [Zeppelin download page](https://zeppelin.apache.org/download.html). > Note that without exporting `SPARK_HOME`, it's running in local mode with included version of Spark. The included version may vary depending on the build profile. -## SparkContext, SQLContext, ZeppelinContext -SparkContext, SQLContext, ZeppelinContext are automatically created and exposed as variable names 'sc', 'sqlContext' and 'z', respectively, both in scala and python environments. +## SparkContext, SQLContext, SparkSession, ZeppelinContext +SparkContext, SQLContext and ZeppelinContext are automatically created and exposed as variable names `sc`, `sqlContext` and `z`, respectively, in Scala, Python and R environments. +Staring from 0.6.1 SparkSession is available as variable `spark` when you are using Spark 2.x. -> Note that scala / python environment shares the same SparkContext, SQLContext, ZeppelinContext instance. +> Note that Scala/Python/R environment shares the same SparkContext, SQLContext and ZeppelinContext instance. ## Dependency Management -There are two ways to load external library in spark interpreter. First is using Interpreter setting menu and second is loading Spark properties. +There are two ways to load external libraries in Spark interpreter. First is using interpreter setting menu and second is loading Spark properties. ### 1. Setting Dependencies via Interpreter Setting Please see [Dependency Management](../manual/dependencymanagement.html) for the details. ### 2. Loading Spark Properties -Once `SPARK_HOME` is set in `conf/zeppelin-env.sh`, Zeppelin uses `spark-submit` as spark interpreter runner. `spark-submit` supports two ways to load configurations. The first is command line options such as --master and Zeppelin can pass these options to `spark-submit` by exporting `SPARK_SUBMIT_OPTIONS` in conf/zeppelin-env.sh. Second is reading configuration options from `SPARK_HOME/conf/spark-defaults.conf`. Spark properites that user can set to distribute libraries are: +Once `SPARK_HOME` is set in `conf/zeppelin-env.sh`, Zeppelin uses `spark-submit` as spark interpreter runner. `spark-submit` supports two ways to load configurations. +The first is command line options such as --master and Zeppelin can pass these options to `spark-submit` by exporting `SPARK_SUBMIT_OPTIONS` in `conf/zeppelin-env.sh`. Second is reading configuration options from `SPARK_HOME/conf/spark-defaults.conf`. Spark properties that user can set to distribute libraries are: - - - - + -
    spark-defaults.conf SPARK_SUBMIT_OPTIONSApplicable Interpreter Description
    spark.jars --jars%spark Comma-separated list of local jars to include on the driver and executor classpaths.
    spark.jars.packages --packages%sparkComma-separated list of maven coordinates of jars to include on the driver and executor classpaths. Will search the local maven repo, then maven central and any additional remote repositories given by --repositories. The format for the coordinates should be groupId:artifactId:version.Comma-separated list of maven coordinates of jars to include on the driver and executor classpaths. Will search the local maven repo, then maven central and any additional remote repositories given by --repositories. The format for the coordinates should be groupId:artifactId:version.
    spark.files --files%pyspark Comma-separated list of files to be placed in the working directory of each executor.
    -> Note that adding jar to pyspark is only availabe via `%dep` interpreter at the moment. Here are few examples: -* SPARK\_SUBMIT\_OPTIONS in conf/zeppelin-env.sh +* `SPARK_SUBMIT_OPTIONS` in `conf/zeppelin-env.sh` + ```bash export SPARK_SUBMIT_OPTIONS="--packages com.databricks:spark-csv_2.10:1.2.0 --jars /path/mylib1.jar,/path/mylib2.jar --files /path/mylib1.py,/path/mylib2.zip,/path/mylib3.egg" + ``` + +* `SPARK_HOME/conf/spark-defaults.conf` -* SPARK_HOME/conf/spark-defaults.conf - + ``` spark.jars /path/mylib1.jar,/path/mylib2.jar spark.jars.packages com.databricks:spark-csv_2.10:1.2.0 spark.files /path/mylib1.py,/path/mylib2.egg,/path/mylib3.zip + ``` -### 3. Dynamic Dependency Loading via %dep interpreter -> Note: `%dep` interpreter is deprecated since v0.6.0. -`%dep` interpreter load libraries to `%spark` and `%pyspark` but not to `%spark.sql` interpreter so we recommend you to use first option instead. +### 3. Dynamic Dependency Loading via %spark.dep interpreter +> Note: `%spark.dep` interpreter loads libraries to `%spark` and `%spark.pyspark` but not to `%spark.sql` interpreter. So we recommend you to use the first option instead. -When your code requires external library, instead of doing download/copy/restart Zeppelin, you can easily do following jobs using `%dep` interpreter. +When your code requires external library, instead of doing download/copy/restart Zeppelin, you can easily do following jobs using `%spark.dep` interpreter. - * Load libraries recursively from Maven repository + * Load libraries recursively from maven repository * Load libraries from local filesystem * Add additional maven repository * Automatically add libraries to SparkCluster (You can turn off) -Dep interpreter leverages scala environment. So you can write any Scala code here. -Note that `%dep` interpreter should be used before `%spark`, `%pyspark`, `%sql`. +Dep interpreter leverages Scala environment. So you can write any Scala code here. +Note that `%spark.dep` interpreter should be used before `%spark`, `%spark.pyspark`, `%spark.sql`. Here's usages. ```scala -%dep +%spark.dep z.reset() // clean up previously added artifact and repository // add maven repository @@ -265,11 +295,11 @@ z.load("groupId:artifactId:version").local() ``` ## ZeppelinContext -Zeppelin automatically injects ZeppelinContext as variable 'z' in your scala/python environment. ZeppelinContext provides some additional functions and utility. +Zeppelin automatically injects `ZeppelinContext` as variable `z` in your Scala/Python environment. `ZeppelinContext` provides some additional functions and utilities. ### Object Exchange -ZeppelinContext extends map and it's shared between scala, python environment. -So you can put some object from scala and read it from python, vise versa. +`ZeppelinContext` extends map and it's shared between Scala and Python environment. +So you can put some objects from Scala and read it from Python, vice versa.
    @@ -279,6 +309,12 @@ So you can put some object from scala and read it from python, vise versa. %spark val myObject = ... z.put("objName", myObject) + +// Exchanging data frames +myScalaDataFrame = ... +z.put("myScalaDataFrame", myScalaDataFrame) + +val myPythonDataFrame = z.get("myPythonDataFrame").asInstanceOf[DataFrame] {% endhighlight %}
    @@ -286,8 +322,14 @@ z.put("objName", myObject) {% highlight python %} # Get object from python -%pyspark +%spark.pyspark myObject = z.get("objName") + +# Exchanging data frames +myPythonDataFrame = ... +z.put("myPythonDataFrame", postsDf._jdf) + +myScalaDataFrame = DataFrame(z.get("myScalaDataFrame"), sqlContext) {% endhighlight %}
    @@ -295,8 +337,8 @@ myObject = z.get("objName") ### Form Creation -ZeppelinContext provides functions for creating forms. -In scala and python environments, you can create forms programmatically. +`ZeppelinContext` provides functions for creating forms. +In Scala and Python environments, you can create forms programmatically.
    @@ -321,7 +363,7 @@ z.select("formName", "option1", Seq(("option1", "option1DisplayName"),
    {% highlight python %} -%pyspark +%spark.pyspark # Create text input form z.input("formName") @@ -342,17 +384,22 @@ z.select("formName", [("option1", "option1DisplayName"), In sql environment, you can create form in simple template. -``` -%sql +```sql +%spark.sql select * from ${table=defaultTableName} where text like '%${search}%' ``` To learn more about dynamic form, checkout [Dynamic Form](../manual/dynamicform.html). +## Matplotlib Integration (pyspark) +Both the `python` and `pyspark` interpreters have built-in support for inline visualization using `matplotlib`, a popular plotting library for python. More details can be found in the [python interpreter documentation](../interpreter/python.html), since matplotlib support is identical. More advanced interactive plotting can be done with pyspark through utilizing Zeppelin's built-in [Angular Display System](../displaysystem/back-end-angular.html), as shown below: + + + ## Interpreter setting option -Interpreter setting can choose one of 'shared', 'scoped', 'isolated' option. Spark interpreter creates separate scala compiler per each notebook but share a single SparkContext in 'scoped' mode (experimental). It creates separate SparkContext per each notebook in 'isolated' mode. +You can choose one of `shared`, `scoped` and `isolated` options wheh you configure Spark interpreter. Spark interpreter creates separated Scala compiler per each notebook but share a single SparkContext in `scoped` mode (experimental). It creates separated SparkContext per each notebook in `isolated` mode. ## Setting up Zeppelin with Kerberos @@ -365,14 +412,14 @@ Logical setup with Zeppelin, Kerberos Key Distribution Center (KDC), and Spark o 1. On the server that Zeppelin is installed, install Kerberos client modules and configuration, krb5.conf. This is to make the server communicate with KDC. -2. Set SPARK\_HOME in `[ZEPPELIN\_HOME]/conf/zeppelin-env.sh` to use spark-submit -(Additionally, you might have to set `export HADOOP\_CONF\_DIR=/etc/hadoop/conf`) +2. Set `SPARK_HOME` in `[ZEPPELIN_HOME]/conf/zeppelin-env.sh` to use spark-submit +(Additionally, you might have to set `export HADOOP_CONF_DIR=/etc/hadoop/conf`) -3. Add the two properties below to spark configuration (`[SPARK_HOME]/conf/spark-defaults.conf`): +3. Add the two properties below to Spark configuration (`[SPARK_HOME]/conf/spark-defaults.conf`): spark.yarn.principal spark.yarn.keytab - > **NOTE:** If you do not have access to the above spark-defaults.conf file, optionally, you may add the lines to the Spark Interpreter through the Interpreter tab in the Zeppelin UI. + > **NOTE:** If you do not have permission to access for the above spark-defaults.conf file, optionally, you can add the above lines to the Spark Interpreter setting through the Interpreter tab in the Zeppelin UI. 4. That's it. Play with Zeppelin! diff --git a/docs/manual/dependencymanagement.md b/docs/manual/dependencymanagement.md index acf6002e8f2..44068dae1fd 100644 --- a/docs/manual/dependencymanagement.md +++ b/docs/manual/dependencymanagement.md @@ -1,7 +1,7 @@ --- layout: page -title: "Dependency Management" -description: "" +title: "Dependency Management for Apache Spark Interpreter" +description: "Include external libraries to Apache Spark Interpreter by setting dependencies in interpreter menu." group: manual --- +{% include JB/setup %} + +# Interpreter Execution Hooks (Experimental) + +
    + +## Overview + +Apache Zeppelin allows for users to specify additional code to be executed by an interpreter at pre and post-paragraph code execution. +This is primarily useful if you need to run the same set of code for all of the paragraphs within your notebook at specific times. +Currently, this feature is only available for the spark and pyspark interpreters. +To specify your hook code, you may use `z.registerHook()`. +For example, enter the following into one paragraph: + +```python +%pyspark +z.registerHook("post_exec", "print 'This code should be executed before the parapgraph code!'") +z.registerHook("pre_exec", "print 'This code should be executed after the paragraph code!'") +``` + +These calls will not take into effect until the next time you run a paragraph. + + +In another paragraph, enter + +```python +%pyspark +print "This code should be entered into the paragraph by the user!" +``` + +The output should be: + +``` +This code should be executed before the paragraph code! +This code should be entered into the paragraph by the user! +This code should be executed after the paragraph code! +``` + +If you ever need to know the hook code, use `z.getHook()`: + +```python +%pyspark +print z.getHook("post_exec") + +print 'This code should be executed after the paragraph code!' +``` +Any call to `z.registerHook()` will automatically overwrite what was previously registered. +To completely unregister a hook event, use `z.unregisterHook(eventCode)`. +Currently only `"post_exec"` and `"pre_exec"` are valid event codes for the Zeppelin Hook Registry system. + +Finally, the hook registry is internally shared by other interpreters in the same group. +This would allow for hook code for one interpreter REPL to be set by another as follows: + +```scala +%spark +z.unregisterHook("post_exec", "pyspark") +``` + +The API is identical for both the spark (scala) and pyspark (python) implementations. + +### Caveats +Calls to `z.registerHook("pre_exec", ...)` should be made with care. If there are errors in your specified hook code, this will cause the interpreter REPL to become unable to execute any code pass the pre-execute stage making it impossible for direct calls to `z.unregisterHook()` to take into effect. Current workarounds include calling `z.unregisterHook()` from a different interpreter REPL in the same interpreter group (see above) or manually restarting the interpreter group in the UI. diff --git a/docs/manual/interpreterinstallation.md b/docs/manual/interpreterinstallation.md index cfd58105a94..5825d1d6297 100644 --- a/docs/manual/interpreterinstallation.md +++ b/docs/manual/interpreterinstallation.md @@ -1,7 +1,7 @@ --- layout: page -title: "Interpreter Installation" -description: "" +title: "Interpreter Installation in Netinst Binary Package" +description: "Apache Zeppelin provides Interpreter Installation mechanism for whom downloaded Zeppelin netinst binary package, or just want to install another 3rd party interpreters." group: manual --- +{% include JB/setup %} + +## Run zeppelin interpreter process as web front end user + + * Enable shiro auth in shiro.ini + +``` +[users] +user1 = password1, role1 +user2 = password2, role2 +``` + + * Enable password-less ssh for the user you want to impersonate (say user1). + +``` +adduser user1 +#ssh-keygen (optional if you don't already have generated ssh-key. +ssh user1@localhost mkdir -p .ssh +cat ~/.ssh/id_rsa.pub | ssh user1@localhost 'cat >> .ssh/authorized_keys' +``` + +* Alternatively instead of password-less, user can override ZEPPELIN_IMPERSONATE_CMD in zeppelin-env.sh + +``` +export ZEPPELIN_IMPERSONATE_CMD='sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c ' +``` + + + * Start zeppelin server. + +
    +
    +
    + Screenshot +

    +
    +
    + + + + +
    +
    +
    + + * Go to interpreter setting page, and enable "User Impersonate" in any of the interpreter (in my example its shell interpreter) + + * Test with a simple paragraph + +``` +%sh +whoami +``` + + +Note that usage of "User Impersonate" option will enable Spark interpreter to use `--proxy-user` option with current user by default. If you want to disable `--proxy-user` option, then refer to `ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER` variable in `conf/zeppelin-env.sh` diff --git a/docs/pleasecontribute.md b/docs/pleasecontribute.md index 063b48f2bee..746b39dc87c 100644 --- a/docs/pleasecontribute.md +++ b/docs/pleasecontribute.md @@ -1,8 +1,8 @@ --- layout: page -title: "Please contribute" -description: "" -group: development +title: +description: +group: --- +{% include JB/setup %} + # Explore Apache Zeppelin UI
    ## Main home -The first time you connect to Zeppelin, you'll land at the main page similar to the below screen capture. +The first time you connect to Zeppelin ([default installations start on http://localhost:8080](http://localhost:8080/)), you'll land at the main page similar to the below screen capture. diff --git a/docs/quickstart/install_with_flink_and_spark_cluster.md b/docs/quickstart/install_with_flink_and_spark_cluster.md new file mode 100644 index 00000000000..89d6d6edc64 --- /dev/null +++ b/docs/quickstart/install_with_flink_and_spark_cluster.md @@ -0,0 +1,421 @@ +--- +layout: page +title: "Install Zeppelin with Flink and Spark in cluster mode" +description: "Tutorial is valid for Spark 1.6.x and Flink 1.1.2" +group: tutorial +--- + + +{% include JB/setup %} + +# Install with flink and spark cluster + +
    + +This tutorial is extremely entry-level. It assumes no prior knowledge of Linux, git, or other tools. If you carefully type what I tell you when I tell you, you should be able to get Zeppelin running. + +## Installing Zeppelin with Flink and Spark in cluster mode + +This tutorial assumes the user has a machine (real or [virtual](https://www.virtualbox.org/wiki/Downloads) with a fresh, minimal installation of [Ubuntu 14.04.3 Server](http://www.ubuntu.com/download/server). + +**Note:** On the size requirements of the Virtual Machine, some users reported trouble when using the default virtual machine sizes, specifically that the hard drive needed to be at least 16GB- other users did not have this issue. + +There are many good tutorials on how to install Ubuntu Server on a virtual box, [here is one of them](http://ilearnstack.com/2013/04/13/setting-ubuntu-vm-in-virtualbox/) + +### Required Programs + +Assuming the minimal install, there are several programs that we will need to install before Zeppelin, Flink, and Spark. + +- git +- openssh-server +- OpenJDK 7 +- Maven 3.1+ + +For git, openssh-server, and OpenJDK 7 we will be using the apt package manager. + +##### git +From the command prompt: + +``` +sudo apt-get install git +``` + +##### openssh-server + +``` +sudo apt-get install openssh-server +``` + +##### OpenJDK 7 + +``` +sudo apt-get install openjdk-7-jdk openjdk-7-jre-lib +``` +*A note for those using Ubuntu 16.04*: To install `openjdk-7` on Ubuntu 16.04, one must add a repository. [Source](http://askubuntu.com/questions/761127/ubuntu-16-04-and-openjdk-7) + +``` bash +sudo add-apt-repository ppa:openjdk-r/ppa +sudo apt-get update +sudo apt-get install openjdk-7-jdk openjdk-7-jre-lib +``` + +##### Maven 3.1+ +Zeppelin requires maven version 3.x. The version available in the repositories at the time of writing is 2.x, so maven must be installed manually. + +Purge any existing versions of maven. + +``` +sudo apt-get purge maven maven2 +``` + +Download the maven 3.3.9 binary. + +``` +wget "http://www.us.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz" +``` + +Unarchive the binary and move to the `/usr/local` directory. + +``` +tar -zxvf apache-maven-3.3.9-bin.tar.gz +sudo mv ./apache-maven-3.3.9 /usr/local +``` + +Create symbolic links in `/usr/bin`. + +``` +sudo ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/bin/mvn +``` + +### Installing Zeppelin +This provides a quick overview of Zeppelin installation from source, however the reader is encouraged to review the [Zeppelin Installation Guide](../install/install.html) + +From the command prompt: +Clone Zeppelin. + +``` +git clone https://github.com/apache/zeppelin.git +``` + +Enter the Zeppelin root directory. + +``` +cd zeppelin +``` + +Package Zeppelin. + +``` +mvn clean package -DskipTests -Pspark-1.6 -Dflink.version=1.1.3 -Pscala-2.10 +``` + +`-DskipTests` skips build tests- you're not developing (yet), so you don't need to do tests, the clone version *should* build. + +`-Pspark-1.6` tells maven to build a Zeppelin with Spark 1.6. This is important because Zeppelin has its own Spark interpreter and the versions must be the same. + +`-Dflink.version=1.1.3` tells maven specifically to build Zeppelin with Flink version 1.1.3. + +-`-Pscala-2.10` tells maven to build with Scala v2.10. + + +**Note:** You can build against any version of Spark that has a Zeppelin build profile available. The key is to make sure you check out the matching version of Spark to build. At the time of this writing, Spark 1.6 was the most recent Spark version available. + +**Note:** On build failures. Having installed Zeppelin close to 30 times now, I will tell you that sometimes the build fails for seemingly no reason. +As long as you didn't edit any code, it is unlikely the build is failing because of something you did. What does tend to happen, is some dependency that maven is trying to download is unreachable. If your build fails on this step here are some tips: + +- Don't get discouraged. +- Scroll up and read through the logs. There will be clues there. +- Retry (that is, run the `mvn clean package -DskipTests -Pspark-1.6` again) +- If there were clues that a dependency couldn't be downloaded wait a few hours or even days and retry again. Open source software when compiling is trying to download all of the dependencies it needs, if a server is off-line there is nothing you can do but wait for it to come back. +- Make sure you followed all of the steps carefully. +- Ask the community to help you. Go [here](http://zeppelin.apache.org/community.html) and join the user mailing list. People are there to help you. Make sure to copy and paste the build output (everything that happened in the console) and include that in your message. + + +Start the Zeppelin daemon. + +``` +bin/zeppelin-daemon.sh start +``` + +Use `ifconfig` to determine the host machine's IP address. If you are not familiar with how to do this, a fairly comprehensive post can be found [here](http://www.cyberciti.biz/faq/how-to-find-out-the-ip-address-assigned-to-eth0-and-display-ip-only/). + +Open a web-browser on a machine connected to the same network as the host (or in the host operating system if using a virtual machine). Navigate to http://`yourip`:8080, where yourip is the IP address you found in `ifconfig`. + +See the [Zeppelin tutorial](../tutorial/tutorial.html) for basic Zeppelin usage. It is also advised that you take a moment to check out the tutorial notebook that is included with each Zeppelin install, and to familiarize yourself with basic notebook functionality. + +##### Flink Test +Create a new notebook named "Flink Test" and copy and paste the following code. + + +```scala + +%flink // let Zeppelin know what interpreter to use. + +val text = benv.fromElements("In the time of chimpanzees, I was a monkey", // some lines of text to analyze +"Butane in my veins and I'm out to cut the junkie", +"With the plastic eyeballs, spray paint the vegetables", +"Dog food stalls with the beefcake pantyhose", +"Kill the headlights and put it in neutral", +"Stock car flamin' with a loser in the cruise control", +"Baby's in Reno with the Vitamin D", +"Got a couple of couches, sleep on the love seat", +"Someone came in sayin' I'm insane to complain", +"About a shotgun wedding and a stain on my shirt", +"Don't believe everything that you breathe", +"You get a parking violation and a maggot on your sleeve", +"So shave your face with some mace in the dark", +"Savin' all your food stamps and burnin' down the trailer park", +"Yo, cut it") + +/* The meat and potatoes: + this tells Flink to iterate through the elements, in this case strings, + transform the string to lower case and split the string at white space into individual words + then finally aggregate the occurrence of each word. + + This creates the count variable which is a list of tuples of the form (word, occurances) + +counts.collect().foreach(println(_)) // execute the script and print each element in the counts list + +*/ +val counts = text.flatMap{ _.toLowerCase.split("\\W+") }.map { (_,1) }.groupBy(0).sum(1) + +counts.collect().foreach(println(_)) // execute the script and print each element in the counts list + +``` + +Run the code to make sure the built-in Zeppelin Flink interpreter is working properly. + +##### Spark Test +Create a new notebook named "Spark Test" and copy and paste the following code. + +```scala +%spark // let Zeppelin know what interpreter to use. + +val text = sc.parallelize(List("In the time of chimpanzees, I was a monkey", // some lines of text to analyze +"Butane in my veins and I'm out to cut the junkie", +"With the plastic eyeballs, spray paint the vegetables", +"Dog food stalls with the beefcake pantyhose", +"Kill the headlights and put it in neutral", +"Stock car flamin' with a loser in the cruise control", +"Baby's in Reno with the Vitamin D", +"Got a couple of couches, sleep on the love seat", +"Someone came in sayin' I'm insane to complain", +"About a shotgun wedding and a stain on my shirt", +"Don't believe everything that you breathe", +"You get a parking violation and a maggot on your sleeve", +"So shave your face with some mace in the dark", +"Savin' all your food stamps and burnin' down the trailer park", +"Yo, cut it")) + + +/* The meat and potatoes: + this tells spark to iterate through the elements, in this case strings, + transform the string to lower case and split the string at white space into individual words + then finally aggregate the occurrence of each word. + + This creates the count variable which is a list of tuples of the form (word, occurances) +*/ +val counts = text.flatMap { _.toLowerCase.split("\\W+") } + .map { (_,1) } + .reduceByKey(_ + _) + +counts.collect().foreach(println(_)) // execute the script and print each element in the counts list +``` + +Run the code to make sure the built-in Zeppelin Flink interpreter is working properly. + +Finally, stop the Zeppelin daemon. From the command prompt run: + +``` +bin/zeppelin-daemon.sh stop +``` + +### Installing Clusters + +##### Flink Cluster + +###### Download Binaries + +Building from source is recommended where possible, for simplicity in this tutorial we will download Flink and Spark Binaries. + +To download the Flink Binary use `wget` + +```bash +wget "http://mirror.cogentco.com/pub/apache/flink/flink-1.1.3/flink-1.1.3-bin-hadoop24-scala_2.10.tgz" +tar -xzvf flink-1.1.3-bin-hadoop24-scala_2.10.tgz +``` + +This will download Flink 1.1.3, compatible with Hadoop 2.4. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `24` to your appropriate version. + +Start the Flink Cluster. + +```bash +flink-1.1.3/bin/start-cluster.sh +``` + +###### Building From source + +If you wish to build Flink from source, the following will be instructive. Note that if you have downloaded and used the binary version this should be skipped. The changing nature of build tools and versions across platforms makes this section somewhat precarious. For example, Java8 and Maven 3.0.3 are recommended for building Flink, which are not recommended for Zeppelin at the time of writing. If the user wishes to attempt to build from source, this section will provide some reference. If errors are encountered, please contact the Apache Flink community. + +See the [Flink Installation guide](https://github.com/apache/flink/blob/master/README.md) for more detailed instructions. + +Return to the directory where you have been downloading, this tutorial assumes that is `$HOME`. Clone Flink, check out release-1.1.3-rc2, and build. + +``` +cd $HOME +git clone https://github.com/apache/flink.git +cd flink +git checkout release-1.1.3-rc2 +mvn clean install -DskipTests +``` + +Start the Flink Cluster in stand-alone mode + +``` +build-target/bin/start-cluster.sh +``` + +###### Ensure the cluster is up + +In a browser, navigate to http://`yourip`:8082 to see the Flink Web-UI. Click on 'Task Managers' in the left navigation bar. Ensure there is at least one Task Manager present. + +
    ![alt text](../assets/themes/zeppelin/img/screenshots/flink-webui.png "The Flink Web-UI")
    + + +If no task managers are present, restart the Flink cluster with the following commands: + +(if binaries) +``` +flink-1.1.3/bin/stop-cluster.sh +flink-1.1.3/bin/start-cluster.sh +``` + + +(if built from source) +``` +build-target/bin/stop-cluster.sh +build-target/bin/start-cluster.sh +``` + + +##### Spark 1.6 Cluster + +###### Download Binaries + +Building from source is recommended where possible, for simplicity in this tutorial we will download Flink and Spark Binaries. + +Using binaries is also + +To download the Spark Binary use `wget` + +```bash +wget "http://d3kbcqa49mib13.cloudfront.net/spark-1.6.3-bin-hadoop2.6.tgz" +tar -xzvf spark-1.6.3-bin-hadoop2.6.tgz +mv spark-1.6.3-bin-hadoop2.6 spark +``` + +This will download Spark 1.6.3, compatible with Hadoop 2.6. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `2.6` to your appropriate version. + +###### Building From source + +Spark is an extraordinarily large project, which takes considerable time to download and build. It is also prone to build failures for similar reasons listed in the Flink section. If the user wishes to attempt to build from source, this section will provide some reference. If errors are encountered, please contact the Apache Spark community. + +See the [Spark Installation](https://github.com/apache/spark/blob/master/README.md) guide for more detailed instructions. + +Return to the directory where you have been downloading, this tutorial assumes that is $HOME. Clone Spark, check out branch-1.6, and build. +**Note:** Recall, we're only checking out 1.6 because it is the most recent Spark for which a Zeppelin profile exists at + the time of writing. You are free to check out other version, just make sure you build Zeppelin against the correct version of Spark. However if you use Spark 2.0, the word count example will need to be changed as Spark 2.0 is not compatible with the following examples. + + +``` +cd $HOME +``` + +Clone, check out, and build Spark version 1.6.x. + +``` +git clone https://github.com/apache/spark.git +cd spark +git checkout branch-1.6 +mvn clean package -DskipTests +``` + +###### Start the Spark cluster + +Return to the `$HOME` directory. + +```bash +cd $HOME +``` + +Start the Spark cluster in stand alone mode, specifying the webui-port as some port other than 8080 (the webui-port of Zeppelin). + +``` +spark/sbin/start-master.sh --webui-port 8082 +``` +**Note:** Why `--webui-port 8082`? There is a digression toward the end of this document that explains this. + +Open a browser and navigate to http://`yourip`:8082 to ensure the Spark master is running. + +
    ![alt text](../assets/themes/zeppelin/img/screenshots/spark-master-webui1.png "It should look like this...")
    + +Toward the top of the page there will be a *URL*: spark://`yourhost`:7077. Note this URL, the Spark Master URI, it will be needed in subsequent steps. + +Start the slave using the URI from the Spark master WebUI: + +``` +spark/sbin/start-slave.sh spark://yourhostname:7077 +``` + +Return to the root directory and start the Zeppelin daemon. + +``` +cd $HOME + +zeppelin/bin/zeppelin-daemon.sh start +``` + +##### Configure Interpreters + +Open a web browser and go to the Zeppelin web-ui at http://yourip:8080. + +Now go back to the Zeppelin web-ui at http://`yourip`:8080 and this time click on *anonymous* at the top right, which will open a drop-down menu, select *Interpreters* to enter interpreter configuration. + +In the Spark section, click the edit button in the top right corner to make the property values editable (looks like a pencil). +The only field that needs to be edited in the Spark interpreter is the master field. Change this value from `local[*]` to the URL you used to start the slave, mine was `spark://ubuntu:7077`. + +Click *Save* to update the parameters, and click *OK* when it asks you about restarting the interpreter. + +Now scroll down to the Flink section. Click the edit button and change the value of *host* from `local` to `localhost`. Click *Save* again. + +Reopen the examples and execute them again (I.e. you need to click the play button at the top of the screen, or the button on the paragraph . + +You should be able check the Flink and Spark webuis (at something like http://`yourip`:8081, http://`yourip`:8082, http://`yourip`:8083) and see that jobs have been run against the clusters. + +**Digression** Sorry to be vague and use terms such as 'something like', but exactly what web-ui is at what port is going to depend on what order you started things. + What is really going on here is you are pointing your browser at specific ports, namely 8081, 8082, and 8083. Flink and Spark all want to put their web-ui on port 8080, but are + well behaved and will take the next port available. Since Zeppelin started first, it will get port 8080. When Flink starts (assuming you started Flink first), it will try to bind to + port 8080, see that it is already taken, and go to the next one available, hopefully 8081. Spark has a webui for the master and the slave, so when they start they will try to bind to 8080 + already taken by Zeppelin), then 8081 (already taken by Flink's webui), then 8082. If everything goes smoothy and you followed the directions precisely, the webuis should be 8081 and 8082. + It *is* possible to specify the port you want the webui to bind to (at the command line by passing the `--webui-port ` flag when you start the Flink and Spark, where `` is the port + you want to see that webui on. You can also set the default webui port of Spark and Flink (and Zeppelin) in the configuration files, but this is a tutorial for novices and slightly out of scope. + + + +### Next Steps + +Check out the [tutorial](./tutorial.html) for more cool things you can do with your new toy! + +[Join the community](http://zeppelin.apache.org/community.html), ask questions and contribute! Every little bit helps. diff --git a/docs/quickstart/tutorial.md b/docs/quickstart/tutorial.md index 9333d1437c7..4947f3ce8a0 100644 --- a/docs/quickstart/tutorial.md +++ b/docs/quickstart/tutorial.md @@ -1,7 +1,7 @@ --- layout: page -title: "Tutorial" -description: "Tutorial is valid for Spark 1.3 and higher" +title: "Apache Zeppelin Tutorial" +description: "This tutorial page contains a short walk-through tutorial that uses Apache Spark backend. Please note that this tutorial is valid for Spark 1.3 and higher." group: quickstart --- +{% include JB/setup %} + # Zeppelin Tutorial
    diff --git a/docs/rest-api/rest-configuration.md b/docs/rest-api/rest-configuration.md index 4323b6d0040..47c65f11d16 100644 --- a/docs/rest-api/rest-configuration.md +++ b/docs/rest-api/rest-configuration.md @@ -1,7 +1,7 @@ --- layout: page -title: "Configuration REST API" -description: "" +title: "Apache Zeppelin Configuration REST API" +description: "This page contains Apache Zeppelin Configuration REST API information." group: rest-api --- +{% include JB/setup %} + +# Apache Zeppelin Credential REST API + +
    + +## Overview +Apache Zeppelin provides several REST APIs for interaction and remote activation of zeppelin functionality. +All REST APIs are available starting with the following endpoint `http://[zeppelin-server]:[zeppelin-port]/api`. +Note that Apache Zeppelin REST APIs receive or return JSON objects, it is recommended for you to install some JSON viewers such as [JSONView](https://chrome.google.com/webstore/detail/jsonview/chklaanhfefbnpoihckbnefhakgolnmc). + +If you work with Apache Zeppelin and find a need for an additional REST API, please [file an issue or send us an email](http://zeppelin.apache.org/community.html). + +
    +## Credential REST API List + +### List Credential information + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```GET``` method returns all key/value pairs of the credential information on the server.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/credential```
    Success code200
    Fail code 500
    sample JSON response + +
    +{
    +  "status": "OK",
    +  "message": "",
    +  "body": {
    +    "userCredentials":{
    +      "entity1":{
    +        "username":"user1",
    +        "password":"password1"
    +      },
    +      "entity2":{
    +        "username":"user2",
    +        "password":"password2"
    +      }
    +    }
    +  }
    +}
    + +
    +### Create an Credential Information + + + + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```PUT``` method creates the credential information with new properties.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/credential/```
    Success code200
    Fail code 500
    Sample JSON input +
    +{
    +  "entity": "e1",
    +  "username": "user",
    +  "password": "password"
    +}
    +        
    +
    Sample JSON response +
    +{
    +  "status": "OK"
    +}
    +        
    +
    + + +
    +### Delete all Credential Information + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```DELETE``` method deletes the credential information.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/credential```
    Success code200
    Fail code 500
    Sample JSON response + {"status":"OK"} +
    + + +
    +### Delete an Credential entity + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```DELETE``` method deletes a given credential entity.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/credential/[entity]```
    Success code200
    Fail code 500
    Sample JSON response + {"status":"OK"} +
    + + +
    + diff --git a/docs/rest-api/rest-helium.md b/docs/rest-api/rest-helium.md new file mode 100644 index 00000000000..b78b5766879 --- /dev/null +++ b/docs/rest-api/rest-helium.md @@ -0,0 +1,378 @@ +--- +layout: page +title: "Apache Zeppelin Helium REST API" +description: "This page contains Apache Zeppelin Helium REST API information." +group: rest-api +--- + +{% include JB/setup %} + +# Apache Zeppelin Helium REST API + +
    + +## Overview +Apache Zeppelin provides several REST APIs for interaction and remote activation of zeppelin functionality. +All REST APIs are available starting with the following endpoint `http://[zeppelin-server]:[zeppelin-port]/api`. +Note that Apache Zeppelin REST APIs receive or return JSON objects, it is recommended for you to install some JSON viewers such as [JSONView](https://chrome.google.com/webstore/detail/jsonview/chklaanhfefbnpoihckbnefhakgolnmc). + +If you work with Apache Zeppelin and find a need for an additional REST API, please [file an issue or send us an email](http://zeppelin.apache.org/community.html). + +## Helium REST API List + +### List of all available helium packages + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```GET``` method returns all the available helium packages in configured registries.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/helium/all```
    Success code200
    Fail code 500
    Sample JSON response +
    +{
    +  "status": "OK",
    +  "message": "",
    +  "body": {
    +    "zeppelin.clock": [
    +      {
    +        "registry": "local",
    +        "pkg": {
    +          "type": "APPLICATION",
    +          "name": "zeppelin.clock",
    +          "description": "Clock (example)",
    +          "artifact": "zeppelin-examples\/zeppelin-example-clock\/target\/zeppelin-example-clock-0.7.0-SNAPSHOT.jar",
    +          "className": "org.apache.zeppelin.example.app.clock.Clock",
    +          "resources": [
    +            [
    +              ":java.util.Date"
    +            ]
    +          ],
    +          "icon": "icon"
    +        },
    +        "enabled": false
    +      }
    +    ],
    +    "zeppelin-bubblechart": [
    +      {
    +        "registry": "local",
    +        "pkg": {
    +          "type": "VISUALIZATION",
    +          "name": "zeppelin-bubblechart",
    +          "description": "Animated bubble chart",
    +          "artifact": ".\/..\/helium\/zeppelin-bubble",
    +          "icon": "icon"
    +        },
    +        "enabled": true
    +      },
    +      {
    +        "registry": "local",
    +        "pkg": {
    +          "type": "VISUALIZATION",
    +          "name": "zeppelin-bubblechart",
    +          "description": "Animated bubble chart",
    +          "artifact": "zeppelin-bubblechart@0.0.2",
    +          "icon": "icon"
    +        },
    +        "enabled": false
    +      }
    +    ],
    +    "zeppelin\_horizontalbar": [
    +      {
    +        "registry": "local",
    +        "pkg": {
    +          "type": "VISUALIZATION",
    +          "name": "zeppelin_horizontalbar",
    +          "description": "Horizontal Bar chart (example)",
    +          "artifact": ".\/zeppelin-examples\/zeppelin-example-horizontalbar",
    +          "icon": "icon"
    +        },
    +        "enabled": true
    +      }
    +    ]
    +  }
    +}
    +        
    +
    + +
    +### Suggest Helium application + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```GET``` method returns suggested helium application for the paragraph.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/helium/suggest/[Note ID]/[Paragraph ID]```
    Success code200
    Fail code + 404 on note or paragraph not exists
    + 500 +
    Sample JSON response +
    +{
    +  "status": "OK",
    +  "message": "",
    +  "body": {
    +    "available": [
    +      {
    +        "registry": "local",
    +        "pkg": {
    +          "type": "APPLICATION",
    +          "name": "zeppelin.clock",
    +          "description": "Clock (example)",
    +          "artifact": "zeppelin-examples\/zeppelin-example-clock\/target\/zeppelin-example-clock-0.7.0-SNAPSHOT.jar",
    +          "className": "org.apache.zeppelin.example.app.clock.Clock",
    +          "resources": [
    +            [
    +              ":java.util.Date"
    +            ]
    +          ],
    +          "icon": "icon"
    +        },
    +        "enabled": true
    +      }
    +    ]
    +  }
    +}
    +        
    +
    + +
    +### Load helium Application on a paragraph + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```GET``` method returns a helium Application id on success.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/helium/load/[Note ID]/[Paragraph ID]```
    Success code200
    Fail code + 404 on note or paragraph not exists
    + 500 for any other errors +
    Sample JSON response +
    +{
    +  "status": "OK",
    +  "message": "",
    +  "body": "app_2C5FYRZ1E-20170108-040449_2068241472zeppelin_clock"
    +}
    +        
    +
    + +
    +### Load bundled visualization script + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```GET``` method returns bundled helium visualization javascript. When refresh=true (optional) is provided, Zeppelin rebuild bundle. otherwise, provided from cache
    URL```http://[zeppelin-server]:[zeppelin-port]/api/helium/visualizations/load[?refresh=true]```
    Success code200 reponse body is executable javascript
    Fail code + 200 reponse body is error message string starts with ERROR:
    +
    + +
    +### Enable package + + + + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```POST``` method enables a helium package. Needs artifact name in input payload
    URL```http://[zeppelin-server]:[zeppelin-port]/api/helium/enable/[Package Name]```
    Success code200
    Fail code 500
    Sample input +
    +zeppelin-examples/zeppelin-example-clock/target/zeppelin-example-clock-0.7.0-SNAPSHOT.jar
    +        
    +
    Sample JSON response +
    +{"status":"OK"}
    +        
    +
    + +
    +### Disable package + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```POST``` method disables a helium package.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/helium/disable/[Package Name]```
    Success code200
    Fail code 500
    Sample JSON response + {"status":"OK"} +
    +
    + +### Get visualization display order + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```GET``` method returns display order of enabled visualization packages.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/helium/visualizationOrder```
    Success code200
    Fail code 500
    Sample JSON response + {"status":"OK","body":["zeppelin_horizontalbar","zeppelin-bubblechart"]} +
    + + +
    + +### Set visualization display order + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```POST``` method sets visualization packages display order.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/helium/visualizationOrder```
    Success code200
    Fail code 500
    Sample JSON input + ["zeppelin-bubblechart", "zeppelin_horizontalbar"] +
    Sample JSON response + {"status":"OK"} +
    \ No newline at end of file diff --git a/docs/rest-api/rest-interpreter.md b/docs/rest-api/rest-interpreter.md index bba7c9bd23c..d7dc6dd14a1 100644 --- a/docs/rest-api/rest-interpreter.md +++ b/docs/rest-api/rest-interpreter.md @@ -1,7 +1,7 @@ --- layout: page -title: "Interpreter REST API" -description: "" +title: "Apache Zeppelin Interpreter REST API" +description: "This page contains Apache Zeppelin Interpreter REST API information." group: rest-api --- +{% include JB/setup %} + +# Apache Zeppelin Notebook Repository API + +
    + +## Overview +Apache Zeppelin provides several REST APIs for interaction and remote activation of zeppelin functionality. +All REST APIs are available starting with the following endpoint `http://[zeppelin-server]:[zeppelin-port]/api`. +Note that Apache Zeppelin REST APIs receive or return JSON objects, it is recommended for you to install some JSON viewers such as [JSONView](https://chrome.google.com/webstore/detail/jsonview/chklaanhfefbnpoihckbnefhakgolnmc). + +If you work with Apache Zeppelin and find a need for an additional REST API, please [file an issue or send us an email](http://zeppelin.apache.org/community.html). + +## Notebook Repository REST API List + +### List all available notebook repositories + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```GET``` method returns all the available notebook repositories.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/notebook-repositories```
    Success code200
    Fail code500
    Sample JSON response +
    +{
    +  "status": "OK",
    +  "message": "",
    +  "body": [
    +    {
    +      "name": "GitNotebookRepo",
    +      "className": "org.apache.zeppelin.notebook.repo.GitNotebookRepo",
    +      "settings": [
    +        {
    +          "type": "INPUT",
    +          "value": [],
    +          "selected": "ZEPPELIN_HOME/zeppelin/notebook/",
    +          "name": "Notebook Path"
    +        }
    +      ]
    +    }
    +  ]
    +}
    +        
    +
    + +
    + +### Reload a notebook repository + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```GET``` method triggers reloading and broadcasting of the note list.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/notebook-repositories/reload```
    Success code200
    Fail code500
    Sample JSON response +
    +{
    +  "status": "OK",
    +  "message": ""
    +}
    +        
    +
    + +
    + +### Update a specific notebook repository + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    DescriptionThis ```PUT``` method updates a specific notebook repository.
    URL```http://[zeppelin-server]:[zeppelin-port]/api/notebook-repositories```
    Success code200
    Fail code + 404 when the specified notebook repository doesn't exist
    + 406 for invalid payload
    + 500 for any other errors +
    Sample JSON input +
    +{
    +  "name":"org.apache.zeppelin.notebook.repo.GitNotebookRepo",
    +  "settings":{
    +    "Notebook Path":"/tmp/notebook/"
    +  }
    +}
    +        
    +
    Sample JSON response +
    +{
    +  "status": "OK",
    +  "message": "",
    +  "body": {
    +    "name": "GitNotebookRepo",
    +    "className": "org.apache.zeppelin.notebook.repo.GitNotebookRepo",
    +    "settings": [
    +      {
    +        "type": "INPUT",
    +        "value": [],
    +        "selected": "/tmp/notebook/",
    +        "name": "Notebook Path"
    +      }
    +    ]
    +  }
    +}
    +        
    +
    diff --git a/docs/rss.xml b/docs/rss.xml index 106b649c273..8c2a9dd9a8c 100644 --- a/docs/rss.xml +++ b/docs/rss.xml @@ -1,6 +1,6 @@ --- layout: nil -title : RSS Feed +title : --- diff --git a/docs/screenshots.md b/docs/screenshots.md index 7a389b74c71..e7af542942e 100644 --- a/docs/screenshots.md +++ b/docs/screenshots.md @@ -1,7 +1,7 @@ --- layout: page -title: "Screenshots" -description: "" +title: +description: --- +{% include JB/setup %} + + diff --git a/docs/search_data.json b/docs/search_data.json new file mode 100644 index 00000000000..3df19e958d6 --- /dev/null +++ b/docs/search_data.json @@ -0,0 +1,17 @@ +--- +layout: null +--- +{ + {% for page in site.pages %}{% if page.title != nil %} + + "{{ page.url | slugify }}": { + "title": "{{ page.title | xml_escape }}", + "content" : "{{page.content | strip_html | strip_newlines | escape | remove: "\"}}", + "url": " {{ page.url | xml_escape }}", + "group": "{{ page.group }}", + "excerpt": {{ page.description | strip_html | truncatewords: 40 | jsonify }} + } + {% unless forloop.last %},{% endunless %} + {% endif %} + {% endfor %} +} diff --git a/docs/security/authentication.md b/docs/security/authentication.md index 7ce160aa2b5..2723c56d307 100644 --- a/docs/security/authentication.md +++ b/docs/security/authentication.md @@ -1,7 +1,7 @@ --- layout: page title: "Authentication for NGINX" -description: "Authentication for NGINX" +description: "There are multiple ways to enable authentication in Apache Zeppelin. This page describes HTTP basic auth using NGINX." group: security --- +{% include JB/setup %} + # Authentication for NGINX
    -Authentication is company-specific. -One option is to use [Basic Access Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication). +[Build in authentication mechanism](./shiroauthentication.html) is recommended way for authentication. In case of you want authenticate using NGINX and [HTTP basic auth](https://en.wikipedia.org/wiki/Basic_access_authentication), please read this document. ## HTTP Basic Authentication using NGINX @@ -83,7 +84,7 @@ This instruction based on Ubuntu 14.04 LTS but may work with other OS with few c } location /ws { # For websocket support - proxy_pass http://zeppelin; + proxy_pass http://zeppelin/ws; proxy_http_version 1.1; proxy_set_header Upgrade websocket; proxy_set_header Connection upgrade; @@ -128,4 +129,4 @@ This instruction based on Ubuntu 14.04 LTS but may work with other OS with few c Another option is to have an authentication server that can verify user credentials in an LDAP server. If an incoming request to the Zeppelin server does not have a cookie with user information encrypted with the authentication server public key, the user is redirected to the authentication server. Once the user is verified, the authentication server redirects the browser to a specific URL in the Zeppelin server which sets the authentication cookie in the browser. -The end result is that all requests to the Zeppelin web server have the authentication cookie which contains user and groups information. \ No newline at end of file +The end result is that all requests to the Zeppelin web server have the authentication cookie which contains user and groups information. diff --git a/docs/security/datasource_authorization.md b/docs/security/datasource_authorization.md new file mode 100644 index 00000000000..03165c8ac27 --- /dev/null +++ b/docs/security/datasource_authorization.md @@ -0,0 +1,64 @@ +--- +layout: page +title: "Data Source Authorization in Apache Zeppelin" +description: "Apache Zeppelin supports protected data sources. In case of a MySql database, every users can set up their own credentials to access it." +group: security +--- + +{% include JB/setup %} + +# Data Source Authorization in Apache Zeppelin + +
    + +## Overview + +Data source authorization involves authenticating to the data source like a Mysql database and letting it determine user permissions. +Apache Zeppelin allows users to use their own credentials to authenticate with **Data Sources**. + +For example, let's assume you have an account in the Vertica databases with credentials. +You might want to use this account to create a JDBC connection instead of a shared account with all users who are defined in `conf/shiro.ini`. +In this case, you can add your credential information to Apache Zeppelin and use them with below simple steps. + +## How to save the credential information? +You can add new credentials in the dropdown menu for your data source which can be passed to interpreters. + + + +**Entity** can be the key that distinguishes each credential sets.(We suggest that the convention of the **Entity** is `[Interpreter Group].[Interpreter Name]`.) +Please see [what is interpreter group](../manual/interpreters.html#what-is-interpreter-group) for the detailed information. + +Type **Username & Password** for your own credentials. ex) Mysql user & password of the JDBC Interpreter. + + + +The credentials saved as per users defined in `conf/shiro.ini`. +If you didn't activate [shiro authentication in Apache Zeppelin](./shiroauthentication.html), your credential information will be saved as `anonymous`. +All credential information also can be found in `conf/credentials.json`. + +#### JDBC interpreter +You need to maintain per-user connection pools. +The interpret method takes the user string as a parameter and executes the jdbc call using a connection in the user's connection pool. + +#### Presto +You don't need a password if the Presto DB server runs backend code using HDFS authorization for the user. + +#### Vertica and Mysql +You have to store the password information for users. + +## Please note +As a first step of data source authentication feature, [ZEPPELIN-828](https://issues.apache.org/jira/browse/ZEPPELIN-828) was proposed and implemented in Pull Request [#860](https://github.com/apache/zeppelin/pull/860). +Currently, only customized 3rd party interpreters can use this feature. We are planning to apply this mechanism to [the community managed interpreters](../manual/interpreterinstallation.html#available-community-managed-interpreters) in the near future. +Please keep track [ZEPPELIN-1070](https://issues.apache.org/jira/browse/ZEPPELIN-1070). diff --git a/docs/security/interpreter_authorization.md b/docs/security/interpreter_authorization.md deleted file mode 100644 index 6e59e0718a9..00000000000 --- a/docs/security/interpreter_authorization.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -layout: page -title: "Notebook Authorization" -description: "Notebook Authorization" -group: security ---- - -# Interpreter and Data Source Authorization - -
    - -## Interpreter Authorization - -Interpreter authorization involves permissions like creating an interpreter and execution queries using it. - -## Data Source Authorization - -Data source authorization involves authenticating to the data source like a Mysql database and letting it determine user permissions. - -For the JDBC interpreter, we need to maintain per-user connection pools. -The interpret method takes the user string as parameter and executes the jdbc call using a connection in the user's connection pool. - -In case of Presto, we don't need password if the Presto DB server runs backend code using HDFS authorization for the user. -For databases like Vertica and Mysql we have to store password information for users. - -The Credentials tab in the navbar allows users to save credentials for data sources which are passed to interpreters. diff --git a/docs/security/notebook_authorization.md b/docs/security/notebook_authorization.md index 87885676ef3..a22785441eb 100644 --- a/docs/security/notebook_authorization.md +++ b/docs/security/notebook_authorization.md @@ -1,7 +1,7 @@ --- layout: page -title: "Notebook Authorization" -description: "Notebook Authorization" +title: "Notebook Authorization in Apache Zeppelin" +description: "This page will guide you how you can set the permission for Zeppelin notebooks. This document assumes that Apache Shiro authentication was set up." group: security --- +{% include JB/setup %} + # Zeppelin Notebook Authorization
    ## Overview -We assume that there is an **Shiro Authentication** component that associates a user string and a set of group strings with every NotebookSocket. +We assume that there is an **Shiro Authentication** component that associates a user string and a set of group strings with every NotebookSocket. If you don't set the authentication components yet, please check [Shiro authentication for Apache Zeppelin](./shiroauthentication.html) first. ## Authorization Setting -You can set Zeppelin notebook permissions in each notebooks. Of course only **notebook owners** can change this configuration. +You can set Zeppelin notebook permissions in each notebooks. Of course only **notebook owners** can change this configuration. Just click **Lock icon** and open the permission setting page in your notebook. -As you can see, each Zeppelin notebooks has 3 entities : +As you can see, each Zeppelin notebooks has 3 entities : * Owners ( users or groups ) * Readers ( users or groups ) @@ -40,16 +42,35 @@ As you can see, each Zeppelin notebooks has 3 entities : Fill out the each forms with comma seperated **users** and **groups** configured in `conf/shiro.ini` file. If the form is empty (*), it means that any users can perform that operation. -If someone who doesn't have **read** permission is trying to access the notebook or someone who doesn't have **write** permission is trying to edit the notebook, Zeppelin will ask to login or block the user. +If someone who doesn't have **read** permission is trying to access the notebook or someone who doesn't have **write** permission is trying to edit the notebook, Zeppelin will ask to login or block the user.
    +## Separate notebook workspaces (public vs. private) +By default, the authorization rights allow other users to see the newly created note, meaning the workspace is `public`. This behavior is controllable and can be set through either `ZEPPELIN_NOTEBOOK_PUBLIC` variable in `conf/zeppelin-env.sh`, or through `zeppelin.notebook.public` property in `conf/zeppelin-site.xml`. Thus, in order to make newly created note appear only in your `private` workspace by default, you can set either `ZEPPELIN_NOTEBOOK_PUBLIC` to `false` in your `conf/zeppelin-env.sh` as follows: + +``` +export ZEPPELIN_NOTEBOOK_PUBLIC="false" +``` + +or set `zeppelin.notebook.public` property to `false` in `conf/zeppelin-site.xml` as follows: + +``` + + zeppelin.notebook.public + false + Make notebook public by default when created, private otherwise + +``` + +Behind the scenes, when you create a new note only the `owners` field is filled with current user, leaving `readers` and `writers` fields empty. All the notes with at least one empty authorization field are considered to be in `public` workspace. Thus when setting `zeppelin.notebook.public` (or corresponding `ZEPPELIN_NOTEBOOK_PUBLIC`) to false, newly created notes have `readers` and `writers` fields filled with current user, making note appear as in `private` workspace. + ## How it works In this section, we will explain the detail about how the notebook authorization works in backend side. ### NotebookServer The [NotebookServer](https://github.com/apache/zeppelin/blob/master/zeppelin-server/src/main/java/org/apache/zeppelin/socket/NotebookServer.java) classifies every notebook operations into three categories: **Read**, **Write**, **Manage**. -Before executing a notebook operation, it checks if the user and the groups associated with the `NotebookSocket` have permissions. +Before executing a notebook operation, it checks if the user and the groups associated with the `NotebookSocket` have permissions. For example, before executing a **Read** operation, it checks if the user and the groups have at least one entity that belongs to the **Reader** entities. ### Notebook REST API call diff --git a/docs/security/shiroauthentication.md b/docs/security/shiroauthentication.md index a7ddadd0c11..452600d3eb6 100644 --- a/docs/security/shiroauthentication.md +++ b/docs/security/shiroauthentication.md @@ -1,7 +1,7 @@ --- layout: page -title: "Shiro Security for Apache Zeppelin" -description: "" +title: "Apache Shiro Authentication for Apache Zeppelin" +description: "Apache Shiro is a powerful and easy-to-use Java security framework that performs authentication, authorization, cryptography, and session management. This document explains step by step how Shiro can be used for Zeppelin notebook authentication." group: security --- {% include JB/setup %} -# Shiro authentication for Apache Zeppelin +# Apache Shiro authentication for Apache Zeppelin
    @@ -31,20 +31,12 @@ When you connect to Apache Zeppelin, you will be asked to enter your credentials ## Security Setup You can setup **Zeppelin notebook authentication** in some simple steps. -### 1. Secure the HTTP channel -To secure the HTTP channel, you have to change both **anon** and **authc** settings in `conf/shiro.ini`. In here, **anon** means "the access is anonymous" and **authc** means "formed auth security". - -The default status of them is - -``` -/** = anon -#/** = authc -``` -Deactivate the line "/** = anon" and activate the line "/** = authc" in `conf/shiro.ini` file. +### 1. Enable Shiro +By default in `conf`, you will find `shiro.ini.template`, this file is used as an example and it is strongly recommended +to create a `shiro.ini` file by doing the following command line -``` -#/** = anon -/** = authc +```bash +cp conf/shiro.ini.template conf/shiro.ini ``` For the further information about `shiro.ini` file format, please refer to [Shiro Configuration](http://shiro.apache.org/configuration.html#Configuration-INISections). @@ -66,22 +58,27 @@ Finally, you can login using one of the below **username/password** combinations
    ``` -admin = password1 -user1 = password2 -user2 = password3 +[users] + +admin = password1, admin +user1 = password2, role1, role2 +user2 = password3, role3 +user3 = password4, role2 ``` +You can set the roles for each users next to the password. -### 5. Groups and permissions (optional) -In case you want to leverage user groups and permissions, use one of the following configuration for LDAP or AD under `[main]` segment in `shiro.ini` +## Groups and permissions (optional) +In case you want to leverage user groups and permissions, use one of the following configuration for LDAP or AD under `[main]` segment in `shiro.ini`. ``` -activeDirectoryRealm = org.apache.zeppelin.server.ActiveDirectoryGroupRealm +activeDirectoryRealm = org.apache.zeppelin.realm.ActiveDirectoryGroupRealm activeDirectoryRealm.systemUsername = userNameA activeDirectoryRealm.systemPassword = passwordA activeDirectoryRealm.searchBase = CN=Users,DC=SOME_GROUP,DC=COMPANY,DC=COM activeDirectoryRealm.url = ldap://ldap.test.com:389 activeDirectoryRealm.groupRolesMap = "CN=aGroupName,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"group1" activeDirectoryRealm.authorizationCachingEnabled = false +activeDirectoryRealm.principalSuffix = @corp.company.net ldapRealm = org.apache.zeppelin.server.LdapGroupRealm # search base for ldap groups (only relevant for LdapGroupRealm): @@ -90,7 +87,7 @@ ldapRealm.contextFactory.url = ldap://ldap.test.com:389 ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM ldapRealm.contextFactory.authenticationMechanism = SIMPLE ``` - + also define roles/groups that you want to have in system, like below; ``` @@ -110,7 +107,7 @@ We also provide community custom Realms. ### Active Directory ``` -activeDirectoryRealm = org.apache.zeppelin.server.ActiveDirectoryGroupRealm +activeDirectoryRealm = org.apache.zeppelin.realm.ActiveDirectoryGroupRealm activeDirectoryRealm.systemUsername = userNameA activeDirectoryRealm.systemPassword = passwordA activeDirectoryRealm.hadoopSecurityCredentialPath = jceks://file/user/zeppelin/conf/zeppelin.jceks @@ -118,6 +115,7 @@ activeDirectoryRealm.searchBase = CN=Users,DC=SOME_GROUP,DC=COMPANY,DC=COM activeDirectoryRealm.url = ldap://ldap.test.com:389 activeDirectoryRealm.groupRolesMap = "CN=aGroupName,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"group1" activeDirectoryRealm.authorizationCachingEnabled = false +activeDirectoryRealm.principalSuffix = @corp.company.net ``` @@ -131,7 +129,7 @@ Change the following values in the Shiro.ini file, and uncomment the line: ### LDAP ``` -ldapRealm = org.apache.zeppelin.server.LdapGroupRealm +ldapRealm = org.apache.zeppelin.realm.LdapGroupRealm # search base for ldap groups (only relevant for LdapGroupRealm): ldapRealm.contextFactory.environment[ldap.searchBase] = dc=COMPANY,dc=COM ldapRealm.contextFactory.url = ldap://ldap.test.com:389 @@ -139,6 +137,19 @@ ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM ldapRealm.contextFactory.authenticationMechanism = SIMPLE ``` +### PAM +[PAM](https://en.wikipedia.org/wiki/Pluggable_authentication_module) authentication support allows the reuse of existing authentication +moduls on the host where Zeppelin is running. On a typical system modules are configured per service for example sshd, passwd, etc. under `/etc/pam.d/`. You can +either reuse one of these services or create your own for Zeppelin. Activiting PAM authentication requires two parameters: + 1. realm: The Shiro realm being used + 2. service: The service configured under `/etc/pam.d/` to be used. The name here needs to be the same as the file name under `/etc/pam.d/` + +``` +[main] + pamRealm=org.apache.zeppelin.realm.PamRealm + pamRealm.service=sshd +``` + ### ZeppelinHub [ZeppelinHub](https://www.zeppelinhub.com) is a service that synchronize your Apache Zeppelin notebooks and enables you to collaborate easily. @@ -152,11 +163,11 @@ zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com securityManager.realms = $zeppelinHubRealm ``` -> Note: ZeppelinHub is not releated to apache Zeppelin project. +> Note: ZeppelinHub is not releated to Apache Zeppelin project. ## Secure your Zeppelin information (optional) -By default, anyone who defined in `[users]` can share **Interpreter Setting**, **Credential** and **Configuration** information in Apache Zeppelin. -Sometimes you might want to hide these information for your use case. +By default, anyone who defined in `[users]` can share **Interpreter Setting**, **Credential** and **Configuration** information in Apache Zeppelin. +Sometimes you might want to hide these information for your use case. Since Shiro provides **url-based security**, you can hide the information by commenting or uncommenting these below lines in `conf/shiro.ini`. ``` @@ -167,9 +178,14 @@ Since Shiro provides **url-based security**, you can hide the information by com /api/credential/** = authc, roles[admin] ``` -In this case, only who have `admin` role can see **Interpreter Setting**, **Credential** and **Configuration** information. +In this case, only who have `admin` role can see **Interpreter Setting**, **Credential** and **Configuration** information. If you want to grant this permission to other users, you can change **roles[ ]** as you defined at `[users]` section.
    -> **NOTE :** All of the above configurations are defined in the `conf/shiro.ini` file. This documentation is originally from [SECURITY-README.md](https://github.com/apache/zeppelin/blob/master/SECURITY-README.md). +> **NOTE :** All of the above configurations are defined in the `conf/shiro.ini` file. + + +## Other authentication methods + +- [HTTP Basic Authentication using NGINX](./authentication.html) diff --git a/docs/sitemap.txt b/docs/sitemap.txt index 360fa221c90..bda4c1b4b94 100644 --- a/docs/sitemap.txt +++ b/docs/sitemap.txt @@ -1,6 +1,6 @@ --- # Remember to set production_url in your _config.yml file! -title : Sitemap +title : --- {% for page in site.pages %} {{site.production_url}}{{ page.url }}{% endfor %} diff --git a/docs/storage/storage.md b/docs/storage/storage.md index 19ddd95af82..c99b135f9fb 100644 --- a/docs/storage/storage.md +++ b/docs/storage/storage.md @@ -1,7 +1,7 @@ --- layout: page -title: "Storage" -description: "Notebook Storage option for Zeppelin" +title: "Notebook Storage for Apache Zeppelin" +description: Apache Zeppelin has a pluggable notebook storage mechanism controlled by zeppelin.notebook.storage configuration option with multiple implementations." group: storage --- +{% include JB/setup %} + # Notebook storage options for Apache Zeppelin
    @@ -26,10 +28,12 @@ limitations under the License. Apache Zeppelin has a pluggable notebook storage mechanism controlled by `zeppelin.notebook.storage` configuration option with multiple implementations. There are few notebook storage systems available for a use out of the box: - * (default) all notes are saved in the notebook folder in your local File System - `VFSNotebookRepo` - * use local file system and version it using local Git repository - `GitNotebookRepo` + * (default) use local file system and version it using local Git repository - `GitNotebookRepo` + * all notes are saved in the notebook folder in your local File System - `VFSNotebookRepo` * storage using Amazon S3 service - `S3NotebookRepo` * storage using Azure service - `AzureNotebookRepo` + * storage using MongoDB - `MongoNotebookRepo` + * storage using HDFS - `HdfsNotebookRepo` Multiple storage systems can be used at the same time by providing a comma-separated list of the class-names in the configuration. By default, only first two of them will be automatically kept in sync by Zeppelin. @@ -98,13 +102,13 @@ Uncomment the next property for use S3NotebookRepo class: ``` -Comment out the next property to disable local notebook storage (the default): +Comment out the next property to disable local git notebook storage (the default): ``` zeppelin.notebook.storage - org.apache.zeppelin.notebook.repo.VFSNotebookRepo - notebook persistence layer implementation + org.apache.zeppelin.notebook.repo.GitNotebookRepo + versioned notebook persistence layer implementation ``` @@ -128,6 +132,23 @@ Or using the following setting in **zeppelin-site.xml**: ``` +In order to set custom KMS key region, set the following environment variable in the file **zeppelin-env.sh**: + +``` +export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION = kms-key-region +``` + +Or using the following setting in **zeppelin-site.xml**: + +``` + + zeppelin.notebook.s3.kmsKeyRegion + target-region + AWS KMS key region in your AWS account + +``` +Format of `target-region` is described in more details [here](http://docs.aws.amazon.com/general/latest/gr/rande.html#kms_region) in second `Region` column (e.g. `us-east-1`). + #### Custom Encryption Materials Provider class You may use a custom [``EncryptionMaterialsProvider``](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/model/EncryptionMaterialsProvider.html) class as long as it is available in the classpath and able to initialize itself from system properties or another mechanism. To use this, set the following environment variable in the file **zeppelin-env.sh**: @@ -146,8 +167,26 @@ Or using the following setting in **zeppelin-site.xml**: Custom encryption materials provider used to encrypt notebook data in S3 ``` +#### Enable server-side encryption + +To request server-side encryption of notebooks, set the following environment variable in the file **zeppelin-env.sh**: + +``` +export ZEPPELIN_NOTEBOOK_S3_SSE = true +``` + +Or using the following setting in **zeppelin-site.xml**: + +``` + + zeppelin.notebook.s3.sse + true + Server-side encryption enabled for notebooks + +``` +
    -## Notebook Storage in Azure +## Notebook Storage in Azure Using `AzureNotebookRepo` you can connect your Zeppelin with your Azure account for notebook storage. @@ -172,8 +211,8 @@ Secondly, you can initialize `AzureNotebookRepo` class in the file **zeppelin-si ``` zeppelin.notebook.storage - org.apache.zeppelin.notebook.repo.VFSNotebookRepo - notebook persistence layer implementation + org.apache.zeppelin.notebook.repo.GitNotebookRepo + versioned notebook persistence layer implementation ``` @@ -187,12 +226,12 @@ and commenting out: ``` -In case you want to use simultaneously your local storage with Azure storage use the following property instead: +In case you want to use simultaneously your local git storage with Azure storage use the following property instead: ``` zeppelin.notebook.storage - org.apache.zeppelin.notebook.repo.VFSNotebookRepo, apache.zeppelin.notebook.repo.AzureNotebookRepo + org.apache.zeppelin.notebook.repo.GitNotebookRepo, apache.zeppelin.notebook.repo.AzureNotebookRepo notebook persistence layer implementation ``` @@ -208,6 +247,31 @@ Optionally, you can specify Azure folder structure name in the file **zeppelin-s ```
    + +## Notebook Storage in Hdfs + + To enable your notebooks to be stored on HDFS - uncomment the next property in `zeppelin-site.xml` in order to use HdfsNotebookRepo class: + + ``` + + zeppelin.notebook.storage + org.apache.zeppelin.notebook.repo.HdfsNotebookRepo + notebook persistence layer implementation + + ``` + + and replace the notebook directory property below by an absolute HDFS location as follows : + ``` + + zeppelin.notebook.dir + hdfs://localhost:9000/tmp/notebook + path or URI for notebook persist + +``` + +
    + + ## Storage in ZeppelinHub ZeppelinHub storage layer allows out of the box connection of Zeppelin instance with your ZeppelinHub account. First of all, you need to either comment out the following property in **zeppelin-site.xml**: @@ -217,7 +281,7 @@ ZeppelinHub storage layer allows out of the box connection of Zeppelin instance @@ -226,7 +290,7 @@ ZeppelinHub storage layer allows out of the box connection of Zeppelin instance or set the environment variable in the file **zeppelin-env.sh**: ``` -export ZEPPELIN_NOTEBOOK_STORAGE="org.apache.zeppelin.notebook.repo.VFSNotebookRepo, org.apache.zeppelin.notebook.repo.zeppelinhub.ZeppelinHubRepo" +export ZEPPELIN_NOTEBOOK_STORAGE="org.apache.zeppelin.notebook.repo.GitNotebookRepo, org.apache.zeppelin.notebook.repo.zeppelinhub.ZeppelinHubRepo" ``` Secondly, you need to set the environment variables in the file **zeppelin-env.sh**: @@ -236,4 +300,73 @@ export ZEPPELINHUB_API_TOKEN = ZeppelinHub token export ZEPPELINHUB_API_ADDRESS = address of ZeppelinHub service (e.g. https://www.zeppelinhub.com) ``` -You can get more information on generating `token` and using authentication on the corresponding [help page](http://help.zeppelinhub.com/zeppelin_integration/#add-a-new-zeppelin-instance-and-generate-a-token). \ No newline at end of file +You can get more information on generating `token` and using authentication on the corresponding [help page](http://help.zeppelinhub.com/zeppelin_integration/#add-a-new-zeppelin-instance-and-generate-a-token). + + +## Notebook Storage in MongoDB +Using `MongoNotebookRepo`, you can store your notebook in [MongoDB](https://www.mongodb.com/). + +### Why MongoDB? +* **[High Availability (HA)](https://en.wikipedia.org/wiki/High_availability)** by a [replica set](https://docs.mongodb.com/manual/reference/glossary/#term-replica-set) +* Seperation of storage from server + +### How to use +You can use MongoDB as notebook storage by editting `zeppelin-env.sh` or `zeppelin-site.xml`. + +#### (Method 1) by editting `zeppelin-env.sh` +Add a line below to `$ZEPPELIN_HOME/conf/zeppelin-env.sh`: + +```sh +export ZEPPELIN_NOTEBOOK_STORAGE=org.apache.zeppelin.notebook.repo.MongoNotebookRepo +``` + +> *NOTE:* The default MongoDB connection URI is `mongodb://localhost` + +#### (Method 2) by editting `zeppelin-site.xml` +Or, **uncomment** lines below at `$ZEPPELIN_HOME/conf/zeppelin-site.xml`: + +```xml + + zeppelin.notebook.storage + org.apache.zeppelin.notebook.repo.MongoNotebookRepo + notebook persistence layer implementation + +``` + +And **comment** lines below: + +```xml + + zeppelin.notebook.storage + org.apache.zeppelin.notebook.repo.GitNotebookRepo + versioned notebook persistence layer implementation + +``` + +### Configurable Options + +You can configure options below in `zeppelin-env.sh`. + +* `ZEPPELIN_NOTEBOOK_MONGO_URI` [MongoDB connection URI](https://docs.mongodb.com/manual/reference/connection-string/) used to connect to a MongoDB database server +* `ZEPPELIN_NOTEBOOK_MONGO_DATABASE` Database name +* `ZEPPELIN_NOTEBOOK_MONGO_COLLECTION` Collection name +* `ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT` If `true`, import local notes (refer to description below for details) + +Or, you can configure them in `zeppelin-site.xml`. Corresponding option names as follows: + +* `zeppelin.notebook.mongo.uri` +* `zeppelin.notebook.mongo.database` +* `zeppelin.notebook.mongo.collection` +* `zeppelin.notebook.mongo.autoimport` + +#### Example configurations in `zeppelin-env.sh` + +```sh +export ZEPPELIN_NOTEBOOK_MONGO_URI=mongodb://db1.example.com:27017 +export ZEPPELIN_NOTEBOOK_MONGO_DATABASE=myfancy +export ZEPPELIN_NOTEBOOK_MONGO_COLLECTION=notebook +export ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT=true +``` + +#### Import your local notes automatically +By setting `ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT` as `true` (default `false`), you can import your local notes automatically when Zeppelin daemon starts up. This feature is for easy migration from local file system storage to MongoDB storage. A note with ID already existing in the collection will not be imported. diff --git a/elasticsearch/pom.xml b/elasticsearch/pom.xml index e5eefb9a911..6042a14eaee 100644 --- a/elasticsearch/pom.xml +++ b/elasticsearch/pom.xml @@ -22,20 +22,21 @@ zeppelin org.apache.zeppelin - 0.6.2-SNAPSHOT + 0.8.0-SNAPSHOT .. - org.apache.zeppelin zeppelin-elasticsearch jar - 0.6.2-SNAPSHOT + 0.8.0-SNAPSHOT Zeppelin: Elasticsearch interpreter - 2.3.3 + 2.4.3 + 4.0.2 18.0 0.1.6 + 1.4.9 @@ -51,6 +52,12 @@ elasticsearch ${elasticsearch.version} + + + org.apache.httpcomponents + httpasyncclient + ${httpasyncclient.version} + com.google.guava @@ -64,6 +71,12 @@ ${json-flattener.version} + + com.mashape.unirest + unirest-java + ${unirest.version} + + org.slf4j slf4j-api @@ -80,8 +93,7 @@ maven-enforcer-plugin - 1.3.1 - + enforce none @@ -91,7 +103,6 @@ maven-dependency-plugin - 2.8 copy-dependencies diff --git a/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java b/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java index dfd27e59bca..33448df3db4 100644 --- a/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java +++ b/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java @@ -18,10 +18,10 @@ package org.apache.zeppelin.elasticsearch; import java.io.IOException; -import java.net.InetAddress; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -32,26 +32,21 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.zeppelin.completer.CompletionType; +import org.apache.zeppelin.elasticsearch.action.ActionResponse; +import org.apache.zeppelin.elasticsearch.action.AggWrapper; +import org.apache.zeppelin.elasticsearch.action.HitWrapper; +import org.apache.zeppelin.elasticsearch.client.ElasticsearchClient; +import org.apache.zeppelin.elasticsearch.client.HttpBasedClient; +import org.apache.zeppelin.elasticsearch.client.TransportBasedClient; import org.apache.zeppelin.interpreter.Interpreter; import org.apache.zeppelin.interpreter.InterpreterContext; -import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder; import org.apache.zeppelin.interpreter.InterpreterResult; import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion; -import org.elasticsearch.action.delete.DeleteResponse; -import org.elasticsearch.action.get.GetResponse; -import org.elasticsearch.action.index.IndexResponse; -import org.elasticsearch.action.search.SearchAction; -import org.elasticsearch.action.search.SearchRequestBuilder; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.Client; -import org.elasticsearch.client.transport.TransportClient; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.transport.InetSocketTransportAddress; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.SearchHitField; import org.elasticsearch.search.aggregations.Aggregation; import org.elasticsearch.search.aggregations.Aggregations; import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation; @@ -64,7 +59,7 @@ import com.github.wnameless.json.flattener.JsonFlattener; import com.google.gson.Gson; import com.google.gson.GsonBuilder; -import com.google.gson.JsonParseException; +import com.google.gson.JsonObject; /** @@ -75,82 +70,82 @@ public class ElasticsearchInterpreter extends Interpreter { private static Logger logger = LoggerFactory.getLogger(ElasticsearchInterpreter.class); private static final String HELP = "Elasticsearch interpreter:\n" - + "General format: ///