Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
54fdf0d
Separate spark-dependency into submodule
Leemoonsoo Aug 21, 2015
2052aa3
Load interpreter/spark/dep only when SPARK_HOME is undefined
Leemoonsoo Aug 21, 2015
c49be62
Add hadoop jar and spark jar from HADOOP_HOME, SPARK_HOME when they a…
Leemoonsoo Aug 21, 2015
b1d62a5
Add scala-library in test scope
Leemoonsoo Aug 21, 2015
1b7f951
Add dependency for compile and test
Leemoonsoo Aug 21, 2015
0f9598b
py4j version as a property
Leemoonsoo Aug 21, 2015
c3d96c1
Handle ZEPPELIN_CLASSPATH proper way
Leemoonsoo Aug 21, 2015
9e812e7
Use reflection not to use import org.apache.spark.scheduler.Stage
Leemoonsoo Aug 22, 2015
f1e8789
update travis config
Leemoonsoo Aug 22, 2015
2a61ecd
Clear interpreter directory on mvn clean
Leemoonsoo Aug 22, 2015
2ca3d95
set SPARK_HOME
Leemoonsoo Aug 22, 2015
9d6b40f
Update .travis
Leemoonsoo Aug 22, 2015
df8f0ba
test more efficiently
Leemoonsoo Aug 22, 2015
2606c04
bringing travis-install.sh back
Leemoonsoo Aug 22, 2015
cf0a61e
rm -rf only interpreter directory instead of mvn clean
Leemoonsoo Aug 22, 2015
8de7add
trying to find why travis is not closing the test
Leemoonsoo Aug 22, 2015
797c0e2
enable 1.3.x test
Leemoonsoo Aug 22, 2015
6304180
enable 1.2.x test
Leemoonsoo Aug 22, 2015
cd4519c
try sys.stdout.write instead of print
Leemoonsoo Aug 22, 2015
a0150cf
not use travis-install for mvn test
Leemoonsoo Aug 22, 2015
5f8a734
test -> package
Leemoonsoo Aug 22, 2015
af7a925
add pyspark flag
Leemoonsoo Aug 22, 2015
5edb6fd
Use reflection to call addListener
Leemoonsoo Aug 22, 2015
62b8c45
Print all logs
Leemoonsoo Aug 22, 2015
0c28561
call listenerBus() using reflection
Leemoonsoo Aug 22, 2015
615c395
get correct method
Leemoonsoo Aug 22, 2015
5a17d9c
Call sqlContext.sql using reflection
Leemoonsoo Aug 22, 2015
3a88c77
Test use explicitly %spark
Leemoonsoo Aug 22, 2015
bacfd93
Update readme
Leemoonsoo Aug 23, 2015
eb4ec09
fix reading spark-*.conf file
Leemoonsoo Aug 23, 2015
57b3f96
Add comment
Leemoonsoo Aug 25, 2015
654c378
use consistant, simpler expressions
Leemoonsoo Aug 31, 2015
703beae
Initial implementation of spark conf validator
Leemoonsoo Aug 22, 2015
89653e5
check spark.yarn.jar
Leemoonsoo Aug 22, 2015
22c04e8
Add unittest
Leemoonsoo Aug 22, 2015
a0ce737
Apply validator to SparkInterpreter, SparkSqlInterpreter, PySparkInte…
Leemoonsoo Aug 22, 2015
ca766f2
fix style
Leemoonsoo Aug 22, 2015
f989fda
pysparkpath -> pythonpath
Leemoonsoo Aug 22, 2015
725de6f
turn off zeppelin.spark.diagnosis by default
Leemoonsoo Aug 23, 2015
08d7227
Add apache header
Leemoonsoo Aug 23, 2015
02d86ce
Fix message
Leemoonsoo Aug 23, 2015
7cc2398
Get sparkHome in proper way
Leemoonsoo Aug 31, 2015
e00f0f1
"yarn" defaults to "yarn-client' mode
Leemoonsoo Sep 1, 2015
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,34 +22,34 @@ before_install:
- "sh -e /etc/init.d/xvfb start"

install:
- /bin/bash ./dev/travis/travis-install.sh `pwd` mvn package -DskipTests -Phadoop-2.3 -Ppyspark -B
- mvn package -DskipTests -Phadoop-2.3 -Ppyspark -B

before_script:
-

script:
# spark 1.4
- /bin/bash ./dev/travis/travis-install.sh `pwd` mvn package -Pbuild-distr -Phadoop-2.3 -Ppyspark -B
- mvn package -Pbuild-distr -Phadoop-2.3 -Ppyspark -B
- ./testing/startSparkCluster.sh 1.4.0 2.3
- /bin/bash ./dev/travis/travis-install.sh `pwd` mvn verify -Pusing-packaged-distr -Phadoop-2.3 -Ppyspark -B
- mvn verify -Pusing-packaged-distr -Phadoop-2.3 -Ppyspark -B
- ./testing/stopSparkCluster.sh 1.4.0 2.3
# spark 1.3
- /bin/bash ./dev/travis/travis-install.sh `pwd` mvn clean package -DskipTests -Pspark-1.3 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark'
- /bin/bash ./dev/travis/travis-install.sh `pwd` mvn package -Pbuild-distr -Pspark-1.3 -Phadoop-2.3 -B
- rm -rf `pwd`/interpreter/spark
- mvn package -DskipTests -Pspark-1.3 -Phadoop-2.3 -Ppyspark -B -pl 'zeppelin-interpreter,spark-dependencies,spark'
- ./testing/startSparkCluster.sh 1.3.1 2.3
- mvn verify -Pspark-1.3 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark'
- mvn package -Pspark-1.3 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,zeppelin-zengine,zeppelin-server' -Dtest=org.apache.zeppelin.rest.*Test -DfailIfNoTests=false
- ./testing/stopSparkCluster.sh 1.3.1 2.3
# spark 1.2
- /bin/bash ./dev/travis/travis-install.sh `pwd` mvn clean package -DskipTests -Pspark-1.2 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark'
- /bin/bash ./dev/travis/travis-install.sh `pwd` mvn package -Pbuild-distr -Pspark-1.2 -Phadoop-2.3 -B
# spark 1.2
- rm -rf `pwd`/interpreter/spark
- mvn package -Pspark-1.2 -Phadoop-2.3 -Ppyspark -B -pl 'zeppelin-interpreter,spark-dependencies,spark'
- ./testing/startSparkCluster.sh 1.2.1 2.3
- mvn verify -Pspark-1.2 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark'
- mvn package -Pspark-1.2 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,zeppelin-zengine,zeppelin-server' -Dtest=org.apache.zeppelin.rest.*Test -DfailIfNoTests=false
- ./testing/stopSparkCluster.sh 1.2.1 2.3
# spark 1.1
- /bin/bash ./dev/travis/travis-install.sh `pwd` mvn clean package -DskipTests -Pspark-1.1 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark'
- mvn package -Pbuild-distr -Pspark-1.1 -Phadoop-2.3 -B
- rm -rf `pwd`/interpreter/spark
- mvn package -Pspark-1.1 -Phadoop-2.3 -Ppyspark -B -pl 'zeppelin-interpreter,spark-dependencies,spark'
- ./testing/startSparkCluster.sh 1.1.1 2.3
- /bin/bash ./dev/travis/travis-install.sh `pwd` mvn verify -Pspark-1.1 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark'
- mvn package -Pspark-1.1 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,zeppelin-zengine,zeppelin-server' -Dtest=org.apache.zeppelin.rest.*Test -DfailIfNoTests=false
- ./testing/stopSparkCluster.sh 1.1.1 2.3

after_failure:
Expand Down
51 changes: 33 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,50 +38,52 @@ sudo apt-get install npm
### Build
If you want to build Zeppelin from the source, please first clone this repository. And then:
```
mvn clean package
mvn clean package -DskipTests
```
Build with specific version

Spark 1.1.x
```
mvn clean package -Pspark-1.1 -Dhadoop.version=2.2.0 -Phadoop-2.2 -DskipTests
```
Spark 1.2.x
Build with specific Spark version

Spark 1.4.x
```
mvn clean package -Pspark-1.2 -Dhadoop.version=2.2.0 -Phadoop-2.2 -DskipTests
mvn clean package -Pspark-1.4 -Dhadoop.version=2.2.0 -Phadoop-2.2 -DskipTests
```
Spark 1.3.x
```
mvn clean package -Pspark-1.3 -Dhadoop.version=2.2.0 -Phadoop-2.2 -DskipTests
```
Spark 1.4.x
Spark 1.2.x
```
mvn clean package -Pspark-1.4 -Dhadoop.version=2.2.0 -Phadoop-2.2 -DskipTests
mvn clean package -Pspark-1.2 -Dhadoop.version=2.2.0 -Phadoop-2.2 -DskipTests
```
Spark 1.1.x
```
mvn clean package -Pspark-1.1 -Dhadoop.version=2.2.0 -Phadoop-2.2 -DskipTests
```
CDH 5.X
```
mvn clean package -Pspark-1.2 -Dhadoop.version=2.5.0-cdh5.3.0 -Phadoop-2.4 -DskipTests
```
Yarn (Hadoop 2.2.x)
Yarn (Hadoop 2.7.x)
```
mvn clean package -Pspark-1.1 -Dhadoop.version=2.2.0 -Phadoop-2.2 -Pyarn -DskipTests
mvn clean package -Pspark-1.4 -Dspark.version=1.4.1 -Dhadoop.version=2.7.0 -Phadoop-2.6 -Pyarn -DskipTests
```
Yarn (Hadoop 2.3.x)
Yarn (Hadoop 2.6.x)
```
mvn clean package -Pspark-1.1 -Dhadoop.version=2.3.0 -Phadoop-2.3 -Pyarn -DskipTests
mvn clean package -Pspark-1.1 -Dhadoop.version=2.6.0 -Phadoop-2.6 -Pyarn -DskipTests
```
Yarn (Hadoop 2.4.x)
```
mvn clean package -Pspark-1.1 -Dhadoop.version=2.4.0 -Phadoop-2.4 -Pyarn -DskipTests
```
Yarn (Hadoop 2.6.x)
Yarn (Hadoop 2.3.x)
```
mvn clean package -Pspark-1.1 -Dhadoop.version=2.6.0 -Phadoop-2.6 -Pyarn -DskipTests
mvn clean package -Pspark-1.1 -Dhadoop.version=2.3.0 -Phadoop-2.3 -Pyarn -DskipTests
```
Yarn (Hadoop 2.7.x)
Yarn (Hadoop 2.2.x)
```
mvn clean package -Pspark-1.4 -Dspark.version=1.4.1 -Dhadoop.version=2.7.0 -Phadoop-2.6 -Pyarn -DskipTests
mvn clean package -Pspark-1.1 -Dhadoop.version=2.2.0 -Phadoop-2.2 -Pyarn -DskipTests
```

Ignite (1.1.0-incubating and later)
```
mvn clean package -Dignite.version=1.1.0-incubating -DskipTests
Expand All @@ -96,6 +98,19 @@ If you wish to configure Zeppelin option (like port number), configure the follo
(You can copy ```./conf/zeppelin-env.sh.template``` into ```./conf/zeppelin-env.sh```.
Same for ```zeppelin-site.xml```.)


#### Setting SPARK_HOME and HADOOP_HOME

Without SPARK_HOME and HADOOP_HOME, Zeppelin uses embedded Spark and Hadoop binaries that you have specified with mvn build option.
If you want to use system provided Spark and Hadoop, export SPARK_HOME and HADOOP_HOME in zeppelin-env.sh
You can use any supported version of spark without rebuilding Zeppelin.

```
# ./conf/zeppelin-env.sh
export SPARK_HOME=...
export HADOOP_HOME=...
```

#### External cluster configuration
Mesos

Expand Down
14 changes: 1 addition & 13 deletions bin/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,22 +80,10 @@ function addEachJarInDir(){

function addJarInDir(){
if [[ -d "${1}" ]]; then
export ZEPPELIN_CLASSPATH="${1}/*:${ZEPPELIN_CLASSPATH}"
ZEPPELIN_CLASSPATH="${1}/*:${ZEPPELIN_CLASSPATH}"
fi
}

if [[ ! -z "${SPARK_HOME}" ]] && [[ -d "${SPARK_HOME}" ]]; then
addJarInDir "${SPARK_HOME}"
fi

if [[ ! -z "${HADOOP_HOME}" ]] && [[ -d "${HADOOP_HOME}" ]]; then
addJarInDir "${HADOOP_HOME}"
fi

if [[ ! -z "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi

export ZEPPELIN_CLASSPATH

# Text encoding for
Expand Down
80 changes: 67 additions & 13 deletions bin/interpreter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ fi
addJarInDir "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib"
addJarInDir "${INTERPRETER_DIR}"

export SPARK_CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
CLASSPATH+=":${ZEPPELIN_CLASSPATH}"

HOSTNAME=$(hostname)
ZEPPELIN_SERVER=org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer

Expand All @@ -73,19 +70,76 @@ if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then
$(mkdir -p "${ZEPPELIN_LOG_DIR}")
fi

if [[ ! -z "${SPARK_HOME}" ]]; then
PYSPARKPATH="${SPARK_HOME}/python:${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4j-0.8.2.1-src.zip"
else
PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-0.8.2.1-src.zip"
fi
# set spark related env variables
if [[ "${INTERPRETER_ID}" == "spark" ]]; then
# add Hadoop jars into classpath
if [[ -n "${HADOOP_HOME}" ]]; then
# Apache
addEachJarInDir "${HADOOP_HOME}/share"

if [[ x"" == x"${PYTHONPATH}" ]]; then
export PYTHONPATH="${PYSPARKPATH}"
else
export PYTHONPATH="${PYTHONPATH}:${PYSPARKPATH}"
# CDH
addJarInDir "${HADOOP_HOME}"
addJarInDir "${HADOOP_HOME}/lib"
fi

# autodetect HADOOP_CONF_HOME by heuristic
if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then
if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
elif [[ -d "/etc/hadoop/conf" ]]; then
export HADOOP_CONF_DIR="/etc/hadoop/conf"
fi
fi

if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi

# add Spark jars into classpath
if [[ -n "${SPARK_HOME}" ]]; then
addJarInDir "${SPARK_HOME}/lib"
PYSPARKPATH="${SPARK_HOME}/python:${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4j-0.8.2.1-src.zip"
else
addJarInDir "${INTERPRETER_DIR}/dep"
PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-0.8.2.1-src.zip"
fi

if [[ x"" == x"${PYTHONPATH}" ]]; then
export PYTHONPATH="${PYSPARKPATH}"
else
export PYTHONPATH="${PYTHONPATH}:${PYSPARKPATH}"
fi


# autodetect SPARK_CONF_DIR
if [[ -n "${SPARK_HOME}" ]] && [[ -z "${SPARK_CONF_DIR}" ]]; then
if [[ -d "${SPARK_HOME}/conf" ]]; then
SPARK_CONF_DIR="${SPARK_HOME}/conf"
fi
fi

# read spark-*.conf if exists
if [[ -d "${SPARK_CONF_DIR}" ]]; then
ls ${SPARK_CONF_DIR}/spark-*.conf > /dev/null 2>&1
if [[ "$?" -eq 0 ]]; then
for file in ${SPARK_CONF_DIR}/spark-*.conf; do
while read -r line; do
echo "${line}" | grep -e "^spark[.]" > /dev/null
if [ "$?" -ne 0 ]; then
# skip the line not started with 'spark.'
continue;
fi
SPARK_CONF_KEY=`echo "${line}" | sed -e 's/\(^spark[^ ]*\)[ \t]*\(.*\)/\1/g'`
SPARK_CONF_VALUE=`echo "${line}" | sed -e 's/\(^spark[^ ]*\)[ \t]*\(.*\)/\2/g'`
export ZEPPELIN_JAVA_OPTS+=" -D${SPARK_CONF_KEY}=\"${SPARK_CONF_VALUE}\""
done < "${file}"
done
fi
fi
fi

unset PYSPARKPATH
export SPARK_CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
CLASSPATH+=":${ZEPPELIN_CLASSPATH}"

${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} -cp ${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} &
pid=$!
Expand Down
3 changes: 1 addition & 2 deletions dev/travis/save-logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ def main(file, cmd):
errcode = process.wait()
diff = datetime.now() - start
sys.stdout.write("\r%d seconds %d log lines"%(diff.seconds, count))
print
print cmd, "done", errcode
sys.stdout.write("\n" + str(cmd) + " done " + str(errcode) + "\n")
return errcode

if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
<modules>
<module>zeppelin-interpreter</module>
<module>zeppelin-zengine</module>
<module>spark-dependencies</module>
<module>spark</module>
<module>markdown</module>
<module>angular</module>
Expand Down
Loading