Skip to content

Commit b32cd8e

Browse files
jerryshaokiszk
authored andcommitted
[SPARK-2960][DEPLOY] Support executing Spark from symlinks (reopen)
This PR is based on the work of roji to support running Spark scripts from symlinks. Thanks for the great work roji . Would you mind taking a look at this PR, thanks a lot. For releases like HDP and others, normally it will expose the Spark executables as symlinks and put in `PATH`, but current Spark's scripts do not support finding real path from symlink recursively, this will make spark fail to execute from symlink. This PR try to solve this issue by finding the absolute path from symlink. Instead of using `readlink -f` like what this PR (apache/spark#2386) implemented is that `-f` is not support for Mac, so here manually seeking the path through loop. I've tested with Mac and Linux (Cent OS), looks fine. This PR did not fix the scripts under `sbin` folder, not sure if it needs to be fixed also? Please help to review, any comment is greatly appreciated. Author: jerryshao <[email protected]> Author: Shay Rojansky <[email protected]> Closes #8669 from jerryshao/SPARK-2960.
1 parent 9c904b9 commit b32cd8e

31 files changed

+213
-183
lines changed

bin/beeline

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
# Enter posix mode for bash
2424
set -o posix
2525

26-
# Figure out where Spark is installed
27-
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
26+
# Figure out if SPARK_HOME is set
27+
if [ -z "${SPARK_HOME}" ]; then
28+
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
29+
fi
2830

2931
CLASS="org.apache.hive.beeline.BeeLine"
30-
exec "$FWDIR/bin/spark-class" $CLASS "$@"
32+
exec "${SPARK_HOME}/bin/spark-class" $CLASS "$@"

bin/load-spark-env.sh

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,17 @@
2020
# This script loads spark-env.sh if it exists, and ensures it is only loaded once.
2121
# spark-env.sh is loaded from SPARK_CONF_DIR if set, or within the current directory's
2222
# conf/ subdirectory.
23-
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
23+
24+
# Figure out where Spark is installed
25+
if [ -z "${SPARK_HOME}" ]; then
26+
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
27+
fi
2428

2529
if [ -z "$SPARK_ENV_LOADED" ]; then
2630
export SPARK_ENV_LOADED=1
2731

2832
# Returns the parent of the directory this script lives in.
29-
parent_dir="$(cd "`dirname "$0"`"/..; pwd)"
33+
parent_dir="${SPARK_HOME}"
3034

3135
user_conf_dir="${SPARK_CONF_DIR:-"$parent_dir"/conf}"
3236

@@ -42,18 +46,18 @@ fi
4246

4347
if [ -z "$SPARK_SCALA_VERSION" ]; then
4448

45-
ASSEMBLY_DIR2="$FWDIR/assembly/target/scala-2.11"
46-
ASSEMBLY_DIR1="$FWDIR/assembly/target/scala-2.10"
49+
ASSEMBLY_DIR2="${SPARK_HOME}/assembly/target/scala-2.11"
50+
ASSEMBLY_DIR1="${SPARK_HOME}/assembly/target/scala-2.10"
4751

48-
if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
49-
echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2
50-
echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2
51-
exit 1
52-
fi
52+
if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
53+
echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2
54+
echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2
55+
exit 1
56+
fi
5357

54-
if [ -d "$ASSEMBLY_DIR2" ]; then
55-
export SPARK_SCALA_VERSION="2.11"
56-
else
57-
export SPARK_SCALA_VERSION="2.10"
58-
fi
58+
if [ -d "$ASSEMBLY_DIR2" ]; then
59+
export SPARK_SCALA_VERSION="2.11"
60+
else
61+
export SPARK_SCALA_VERSION="2.10"
62+
fi
5963
fi

bin/pyspark

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
# limitations under the License.
1818
#
1919

20-
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
20+
if [ -z "${SPARK_HOME}" ]; then
21+
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
22+
fi
2123

22-
source "$SPARK_HOME"/bin/load-spark-env.sh
24+
source "${SPARK_HOME}"/bin/load-spark-env.sh
2325
export _SPARK_CMD_USAGE="Usage: ./bin/pyspark [options]"
2426

2527
# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
@@ -64,12 +66,12 @@ fi
6466
export PYSPARK_PYTHON
6567

6668
# Add the PySpark classes to the Python path:
67-
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
68-
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.9-src.zip:$PYTHONPATH"
69+
export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
70+
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.9-src.zip:$PYTHONPATH"
6971

7072
# Load the PySpark shell.py script when ./pyspark is used interactively:
7173
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
72-
export PYTHONSTARTUP="$SPARK_HOME/python/pyspark/shell.py"
74+
export PYTHONSTARTUP="${SPARK_HOME}/python/pyspark/shell.py"
7375

7476
# For pyspark tests
7577
if [[ -n "$SPARK_TESTING" ]]; then
@@ -82,4 +84,4 @@ fi
8284

8385
export PYSPARK_DRIVER_PYTHON
8486
export PYSPARK_DRIVER_PYTHON_OPTS
85-
exec "$SPARK_HOME"/bin/spark-submit pyspark-shell-main --name "PySparkShell" "$@"
87+
exec "${SPARK_HOME}"/bin/spark-submit pyspark-shell-main --name "PySparkShell" "$@"

bin/run-example

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
# limitations under the License.
1818
#
1919

20-
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
21-
export SPARK_HOME="$FWDIR"
22-
EXAMPLES_DIR="$FWDIR"/examples
20+
if [ -z "${SPARK_HOME}" ]; then
21+
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
22+
fi
23+
24+
EXAMPLES_DIR="${SPARK_HOME}"/examples
2325

24-
. "$FWDIR"/bin/load-spark-env.sh
26+
. "${SPARK_HOME}"/bin/load-spark-env.sh
2527

2628
if [ -n "$1" ]; then
2729
EXAMPLE_CLASS="$1"
@@ -34,8 +36,8 @@ else
3436
exit 1
3537
fi
3638

37-
if [ -f "$FWDIR/RELEASE" ]; then
38-
JAR_PATH="${FWDIR}/lib"
39+
if [ -f "${SPARK_HOME}/RELEASE" ]; then
40+
JAR_PATH="${SPARK_HOME}/lib"
3941
else
4042
JAR_PATH="${EXAMPLES_DIR}/target/scala-${SPARK_SCALA_VERSION}"
4143
fi
@@ -44,7 +46,7 @@ JAR_COUNT=0
4446

4547
for f in "${JAR_PATH}"/spark-examples-*hadoop*.jar; do
4648
if [[ ! -e "$f" ]]; then
47-
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
49+
echo "Failed to find Spark examples assembly in ${SPARK_HOME}/lib or ${SPARK_HOME}/examples/target" 1>&2
4850
echo "You need to build Spark before running this program" 1>&2
4951
exit 1
5052
fi
@@ -67,7 +69,7 @@ if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
6769
EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
6870
fi
6971

70-
exec "$FWDIR"/bin/spark-submit \
72+
exec "${SPARK_HOME}"/bin/spark-submit \
7173
--master $EXAMPLE_MASTER \
7274
--class $EXAMPLE_CLASS \
7375
"$SPARK_EXAMPLES_JAR" \

bin/spark-class

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
# limitations under the License.
1818
#
1919

20-
# Figure out where Spark is installed
21-
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
20+
if [ -z "${SPARK_HOME}" ]; then
21+
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
22+
fi
2223

23-
. "$SPARK_HOME"/bin/load-spark-env.sh
24+
. "${SPARK_HOME}"/bin/load-spark-env.sh
2425

2526
# Find the java binary
2627
if [ -n "${JAVA_HOME}" ]; then
@@ -36,10 +37,10 @@ fi
3637

3738
# Find assembly jar
3839
SPARK_ASSEMBLY_JAR=
39-
if [ -f "$SPARK_HOME/RELEASE" ]; then
40-
ASSEMBLY_DIR="$SPARK_HOME/lib"
40+
if [ -f "${SPARK_HOME}/RELEASE" ]; then
41+
ASSEMBLY_DIR="${SPARK_HOME}/lib"
4142
else
42-
ASSEMBLY_DIR="$SPARK_HOME/assembly/target/scala-$SPARK_SCALA_VERSION"
43+
ASSEMBLY_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION"
4344
fi
4445

4546
GREP_OPTIONS=
@@ -65,7 +66,7 @@ LAUNCH_CLASSPATH="$SPARK_ASSEMBLY_JAR"
6566

6667
# Add the launcher build dir to the classpath if requested.
6768
if [ -n "$SPARK_PREPEND_CLASSES" ]; then
68-
LAUNCH_CLASSPATH="$SPARK_HOME/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
69+
LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
6970
fi
7071

7172
export _SPARK_ASSEMBLY="$SPARK_ASSEMBLY_JAR"

bin/spark-shell

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@ esac
2828
# Enter posix mode for bash
2929
set -o posix
3030

31-
export FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
31+
if [ -z "${SPARK_HOME}" ]; then
32+
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
33+
fi
34+
3235
export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]"
3336

3437
# SPARK-4161: scala does not assume use of the java classpath,
@@ -47,11 +50,11 @@ function main() {
4750
# (see https://github.com/sbt/sbt/issues/562).
4851
stty -icanon min 1 -echo > /dev/null 2>&1
4952
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
50-
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
53+
"${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
5154
stty icanon echo > /dev/null 2>&1
5255
else
5356
export SPARK_SUBMIT_OPTS
54-
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
57+
"${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
5558
fi
5659
}
5760

bin/spark-sql

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
# limitations under the License.
1818
#
1919

20-
export FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
20+
if [ -z "${SPARK_HOME}" ]; then
21+
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
22+
fi
23+
2124
export _SPARK_CMD_USAGE="Usage: ./bin/spark-sql [options] [cli option]"
22-
exec "$FWDIR"/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver "$@"
25+
exec "${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver "$@"

bin/spark-submit

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
# limitations under the License.
1818
#
1919

20-
SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
20+
if [ -z "${SPARK_HOME}" ]; then
21+
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
22+
fi
2123

2224
# disable randomized hash for string in Python 3.3+
2325
export PYTHONHASHSEED=0
2426

25-
exec "$SPARK_HOME"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"
27+
exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"

bin/sparkR

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717
# limitations under the License.
1818
#
1919

20-
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
21-
source "$SPARK_HOME"/bin/load-spark-env.sh
20+
if [ -z "${SPARK_HOME}" ]; then
21+
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
22+
fi
23+
24+
source "${SPARK_HOME}"/bin/load-spark-env.sh
2225
export _SPARK_CMD_USAGE="Usage: ./bin/sparkR [options]"
23-
exec "$SPARK_HOME"/bin/spark-submit sparkr-shell-main "$@"
26+
exec "${SPARK_HOME}"/bin/spark-submit sparkr-shell-main "$@"

sbin/slaves.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,11 @@ if [ $# -le 0 ]; then
3636
exit 1
3737
fi
3838

39-
sbin="`dirname "$0"`"
40-
sbin="`cd "$sbin"; pwd`"
39+
if [ -z "${SPARK_HOME}" ]; then
40+
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
41+
fi
4142

42-
. "$sbin/spark-config.sh"
43+
. "${SPARK_HOME}/sbin/spark-config.sh"
4344

4445
# If the slaves file is specified in the command line,
4546
# then it takes precedence over the definition in
@@ -65,7 +66,7 @@ then
6566
shift
6667
fi
6768

68-
. "$SPARK_PREFIX/bin/load-spark-env.sh"
69+
. "${SPARK_HOME}/bin/load-spark-env.sh"
6970

7071
if [ "$HOSTLIST" = "" ]; then
7172
if [ "$SPARK_SLAVES" = "" ]; then

0 commit comments

Comments
 (0)