Skip to content

Commit 3bbf3e7

Browse files
author
Marcelo Vanzin
committed
Merge branch 'master' into SPARK-2933
Conflicts: yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
2 parents ff389ed + 8856c3d commit 3bbf3e7

File tree

126 files changed

+3744
-968
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

126 files changed

+3744
-968
lines changed

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,10 @@ If your project is built with Maven, add this to your POM file's `<dependencies>
118118
## A Note About Thrift JDBC server and CLI for Spark SQL
119119

120120
Spark SQL supports Thrift JDBC server and CLI.
121-
See sql-programming-guide.md for more information about those features.
122-
You can use those features by setting `-Phive-thriftserver` when building Spark as follows.
123-
124-
$ sbt/sbt -Phive-thriftserver assembly
121+
See sql-programming-guide.md for more information about using the JDBC server and CLI.
122+
You can use those features by setting `-Phive` when building Spark as follows.
125123

124+
$ sbt/sbt -Phive assembly
126125

127126
## Configuration
128127

@@ -140,3 +139,5 @@ submitting any copyrighted material via pull request, email, or other means
140139
you agree to license the material under the project's open source license and
141140
warrant that you have the legal authority to do so.
142141

142+
Please see [Contributing to Spark wiki page](https://cwiki.apache.org/SPARK/Contributing+to+Spark)
143+
for more information.

assembly/pom.xml

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@
4343
</properties>
4444

4545
<dependencies>
46+
<!-- Promote Guava to compile scope in this module so it's included while shading. -->
47+
<dependency>
48+
<groupId>com.google.guava</groupId>
49+
<artifactId>guava</artifactId>
50+
<scope>compile</scope>
51+
</dependency>
4652
<dependency>
4753
<groupId>org.apache.spark</groupId>
4854
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -113,6 +119,18 @@
113119
<goal>shade</goal>
114120
</goals>
115121
<configuration>
122+
<relocations>
123+
<relocation>
124+
<pattern>com.google</pattern>
125+
<shadedPattern>org.spark-project.guava</shadedPattern>
126+
<includes>
127+
<include>com.google.common.**</include>
128+
</includes>
129+
<excludes>
130+
<exclude>com.google.common.base.Optional**</exclude>
131+
</excludes>
132+
</relocation>
133+
</relocations>
116134
<transformers>
117135
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
118136
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
@@ -163,11 +181,6 @@
163181
<artifactId>spark-hive_${scala.binary.version}</artifactId>
164182
<version>${project.version}</version>
165183
</dependency>
166-
</dependencies>
167-
</profile>
168-
<profile>
169-
<id>hive-thriftserver</id>
170-
<dependencies>
171184
<dependency>
172185
<groupId>org.apache.spark</groupId>
173186
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>

bin/spark-class

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
# limitations under the License.
1818
#
1919

20+
# NOTE: Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala!
21+
2022
cygwin=false
2123
case "`uname`" in
2224
CYGWIN*) cygwin=true;;
@@ -39,7 +41,7 @@ fi
3941

4042
if [ -n "$SPARK_MEM" ]; then
4143
echo -e "Warning: SPARK_MEM is deprecated, please use a more specific config option" 1>&2
42-
echo -e "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)." 1>&2
44+
echo -e "(e.g., spark.executor.memory or spark.driver.memory)." 1>&2
4345
fi
4446

4547
# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
@@ -73,11 +75,17 @@ case "$1" in
7375
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
7476
;;
7577

76-
# Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS
77-
'org.apache.spark.deploy.SparkSubmit')
78-
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \
79-
-Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
78+
# Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
79+
# SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
80+
'org.apache.spark.deploy.SparkSubmit')
81+
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS"
8082
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
83+
if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then
84+
OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
85+
fi
86+
if [ -n "$SPARK_SUBMIT_DRIVER_MEMORY" ]; then
87+
OUR_JAVA_MEM="$SPARK_SUBMIT_DRIVER_MEMORY"
88+
fi
8189
;;
8290

8391
*)
@@ -97,15 +105,21 @@ else
97105
exit 1
98106
fi
99107
fi
108+
JAVA_VERSION=$($RUNNER -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
100109

101110
# Set JAVA_OPTS to be able to load native libraries and to set heap size
102-
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
111+
if [ "$JAVA_VERSION" -ge 18 ]; then
112+
JAVA_OPTS="$OUR_JAVA_OPTS"
113+
else
114+
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
115+
fi
103116
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
117+
104118
# Load extra JAVA_OPTS from conf/java-opts, if it exists
105119
if [ -e "$FWDIR/conf/java-opts" ] ; then
106120
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
107121
fi
108-
export JAVA_OPTS
122+
109123
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
110124

111125
TOOLS_DIR="$FWDIR"/tools
@@ -146,10 +160,28 @@ if $cygwin; then
146160
fi
147161
export CLASSPATH
148162

149-
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
150-
echo -n "Spark Command: " 1>&2
151-
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
152-
echo -e "========================================\n" 1>&2
163+
# In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
164+
# Here we must parse the properties file for relevant "spark.driver.*" configs before launching
165+
# the driver JVM itself. Instead of handling this complexity in Bash, we launch a separate JVM
166+
# to prepare the launch environment of this driver JVM.
167+
168+
if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then
169+
# This is used only if the properties file actually contains these special configs
170+
# Export the environment variables needed by SparkSubmitDriverBootstrapper
171+
export RUNNER
172+
export CLASSPATH
173+
export JAVA_OPTS
174+
export OUR_JAVA_MEM
175+
export SPARK_CLASS=1
176+
shift # Ignore main class (org.apache.spark.deploy.SparkSubmit) and use our own
177+
exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@"
178+
else
179+
# Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala
180+
if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then
181+
echo -n "Spark Command: " 1>&2
182+
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
183+
echo -e "========================================\n" 1>&2
184+
fi
185+
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
153186
fi
154187

155-
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"

bin/spark-class2.cmd

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,13 @@ rem All drivers use SPARK_JAVA_OPTS + SPARK_DRIVER_MEMORY. The repl also uses SP
7777
)
7878

7979
rem Set JAVA_OPTS to be able to load native libraries and to set heap size
80-
set JAVA_OPTS=-XX:MaxPermSize=128m %OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
80+
for /f "tokens=3" %%i in ('java -version 2^>^&1 ^| find "version"') do set jversion=%%i
81+
for /f "tokens=1 delims=_" %%i in ("%jversion:~1,-1%") do set jversion=%%i
82+
if "%jversion%" geq "1.8.0" (
83+
set JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
84+
) else (
85+
set JAVA_OPTS=-XX:MaxPermSize=128m %OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
86+
)
8187
rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
8288

8389
rem Test whether the user has built Spark

bin/spark-shell

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
cygwin=false
2424
case "`uname`" in
25-
CYGWIN*) cygwin=true;;
25+
CYGWIN*) cygwin=true;;
2626
esac
2727

2828
# Enter posix mode for bash
@@ -32,9 +32,9 @@ set -o posix
3232
FWDIR="$(cd `dirname $0`/..; pwd)"
3333

3434
function usage() {
35-
echo "Usage: ./bin/spark-shell [options]"
36-
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
37-
exit 0
35+
echo "Usage: ./bin/spark-shell [options]"
36+
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
37+
exit 0
3838
}
3939

4040
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
@@ -46,20 +46,20 @@ SUBMIT_USAGE_FUNCTION=usage
4646
gatherSparkSubmitOpts "$@"
4747

4848
function main() {
49-
if $cygwin; then
50-
# Workaround for issue involving JLine and Cygwin
51-
# (see http://sourceforge.net/p/jline/bugs/40/).
52-
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
53-
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
54-
# (see https://github.com/sbt/sbt/issues/562).
55-
stty -icanon min 1 -echo > /dev/null 2>&1
56-
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
57-
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
58-
stty icanon echo > /dev/null 2>&1
59-
else
60-
export SPARK_SUBMIT_OPTS
61-
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
62-
fi
49+
if $cygwin; then
50+
# Workaround for issue involving JLine and Cygwin
51+
# (see http://sourceforge.net/p/jline/bugs/40/).
52+
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
53+
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
54+
# (see https://github.com/sbt/sbt/issues/562).
55+
stty -icanon min 1 -echo > /dev/null 2>&1
56+
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
57+
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
58+
stty icanon echo > /dev/null 2>&1
59+
else
60+
export SPARK_SUBMIT_OPTS
61+
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
62+
fi
6363
}
6464

6565
# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in

bin/spark-submit

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,18 @@
1717
# limitations under the License.
1818
#
1919

20+
# NOTE: Any changes in this file must be reflected in SparkClassLauncher.scala!
21+
2022
export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
2123
ORIG_ARGS=("$@")
2224

2325
while (($#)); do
2426
if [ "$1" = "--deploy-mode" ]; then
25-
DEPLOY_MODE=$2
27+
SPARK_SUBMIT_DEPLOY_MODE=$2
28+
elif [ "$1" = "--properties-file" ]; then
29+
SPARK_SUBMIT_PROPERTIES_FILE=$2
2630
elif [ "$1" = "--driver-memory" ]; then
27-
DRIVER_MEMORY=$2
31+
export SPARK_SUBMIT_DRIVER_MEMORY=$2
2832
elif [ "$1" = "--driver-library-path" ]; then
2933
export SPARK_SUBMIT_LIBRARY_PATH=$2
3034
elif [ "$1" = "--driver-class-path" ]; then
@@ -35,10 +39,24 @@ while (($#)); do
3539
shift
3640
done
3741

38-
DEPLOY_MODE=${DEPLOY_MODE:-"client"}
42+
DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
43+
export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"}
44+
export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}
45+
46+
# For client mode, the driver will be launched in the same JVM that launches
47+
# SparkSubmit, so we may need to read the properties file for any extra class
48+
# paths, library paths, java options and memory early on. Otherwise, it will
49+
# be too late by the time the driver JVM has started.
3950

40-
if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then
41-
export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
51+
if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FILE" ]]; then
52+
# Parse the properties file only if the special configs exist
53+
contains_special_configs=$(
54+
grep -e "spark.driver.extra*\|spark.driver.memory" "$SPARK_SUBMIT_PROPERTIES_FILE" | \
55+
grep -v "^[[:space:]]*#"
56+
)
57+
if [ -n "$contains_special_configs" ]; then
58+
export SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
59+
fi
4260
fi
4361

4462
exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"

bin/utils.sh

100644100755
File mode changed.

conf/spark-defaults.conf.template

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
# This is useful for setting default environmental settings.
33

44
# Example:
5-
# spark.master spark://master:7077
6-
# spark.eventLog.enabled true
7-
# spark.eventLog.dir hdfs://namenode:8021/directory
8-
# spark.serializer org.apache.spark.serializer.KryoSerializer
5+
# spark.master spark://master:7077
6+
# spark.eventLog.enabled true
7+
# spark.eventLog.dir hdfs://namenode:8021/directory
8+
# spark.serializer org.apache.spark.serializer.KryoSerializer
9+
# spark.driver.memory 5g
10+
# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dkey=value -Dnumbers="one two three"

core/pom.xml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,15 @@
6868
<groupId>org.eclipse.jetty</groupId>
6969
<artifactId>jetty-server</artifactId>
7070
</dependency>
71+
<!--
72+
Promote Guava to "compile" so that maven-shade-plugin picks it up (for packaging the Optional
73+
class exposed in the Java API). The plugin will then remove this dependency from the published
74+
pom, so that Guava does not pollute the client's compilation classpath.
75+
-->
7176
<dependency>
7277
<groupId>com.google.guava</groupId>
7378
<artifactId>guava</artifactId>
79+
<scope>compile</scope>
7480
</dependency>
7581
<dependency>
7682
<groupId>org.apache.commons</groupId>
@@ -322,6 +328,35 @@
322328
</arguments>
323329
</configuration>
324330
</plugin>
331+
<plugin>
332+
<groupId>org.apache.maven.plugins</groupId>
333+
<artifactId>maven-shade-plugin</artifactId>
334+
<executions>
335+
<execution>
336+
<phase>package</phase>
337+
<goals>
338+
<goal>shade</goal>
339+
</goals>
340+
<configuration>
341+
<shadedArtifactAttached>false</shadedArtifactAttached>
342+
<artifactSet>
343+
<includes>
344+
<include>com.google.guava:guava</include>
345+
</includes>
346+
</artifactSet>
347+
<filters>
348+
<!-- See comment in the guava dependency declaration above. -->
349+
<filter>
350+
<artifact>com.google.guava:guava</artifact>
351+
<includes>
352+
<include>com/google/common/base/Optional*</include>
353+
</includes>
354+
</filter>
355+
</filters>
356+
</configuration>
357+
</execution>
358+
</executions>
359+
</plugin>
325360
</plugins>
326361

327362
<resources>

0 commit comments

Comments
 (0)