Skip to content

Commit a91ea19

Browse files
committed
Fix precedence of library paths, classpath, java opts and memory
This was previously broken because of the way we pass command line arguments. As of this commit, the ordering becomes: SPARK_SUBMIT_DRIVER_MEMORY > spark.driver.memory > SPARK_DRIVER_MEMORY SPARK_SUBMIT_CLASSPATH > spark.driver.extraClassPath SPARK_SUBMIT_LIBRARY_PATH > spark.driver.extraLibraryPath SPARK_SUBMIT_JAVA_OPTS > spark.driver.extraJavaOpts We achieve this by passing existing environment variables to SparkClassLauncher directly.
1 parent 158f813 commit a91ea19

File tree

3 files changed

+117
-97
lines changed

3 files changed

+117
-97
lines changed

bin/spark-class

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
# limitations under the License.
1818
#
1919

20+
# NOTE: Any changes to this file must be reflected in SparkClassLauncher.scala!
21+
2022
cygwin=false
2123
case "`uname`" in
2224
CYGWIN*) cygwin=true;;
@@ -73,13 +75,16 @@ case "$1" in
7375
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
7476
;;
7577

76-
# Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS
77-
'org.apache.spark.deploy.SparkSubmit')
78-
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS"
78+
# Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_JAVA_OPTS + SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
79+
'org.apache.spark.deploy.SparkSubmit')
80+
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_JAVA_OPTS"
81+
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
7982
if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then
8083
OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
8184
fi
82-
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
85+
if [ -n "$SPARK_SUBMIT_DRIVER_MEMORY" ]; then
86+
OUR_JAVA_MEM="$SPARK_SUBMIT_DRIVER_MEMORY"
87+
fi
8388
;;
8489

8590
*)
@@ -102,7 +107,6 @@ fi
102107

103108
# Set JAVA_OPTS to be able to load native libraries and to set heap size
104109
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
105-
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
106110

107111
# Load extra JAVA_OPTS from conf/java-opts, if it exists
108112
if [ -e "$FWDIR/conf/java-opts" ] ; then
@@ -149,27 +153,27 @@ if $cygwin; then
149153
fi
150154
export CLASSPATH
151155

152-
if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then
153-
echo -n "Spark Command: " 1>&2
154-
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
155-
echo -e "========================================\n" 1>&2
156-
fi
157-
158156
# In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
159-
# Here we must parse the properties file for relevant "spark.driver.*" configs for launching
160-
# the driver JVM itself.
161-
162-
if [ -n "$SPARK_SUBMIT_CLIENT_MODE" ]; then
163-
# This is currently used only if the properties file actually contains these special configs
164-
exec "$RUNNER" org.apache.spark.deploy.SparkClassLauncher \
165-
"$PROPERTIES_FILE" \
166-
"$RUNNER" \
167-
"$CLASSPATH" \
168-
"$SPARK_SUBMIT_LIBRARY_PATH" \
169-
"$JAVA_OPTS" \
170-
"$OUR_JAVA_MEM" \
171-
"$@"
157+
# Here we must parse the properties file for relevant "spark.driver.*" configs before launching
158+
# the driver JVM itself. Instead of handling this complexity in BASH, we launch a separate JVM
159+
# to prepare the launch environment of this driver JVM.
160+
161+
if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then
162+
# This is used only if the properties file actually contains these special configs
163+
# Export the environment variables needed by SparkClassLauncher
164+
export RUNNER
165+
export CLASSPATH
166+
export JAVA_OPTS
167+
export OUR_JAVA_MEM
168+
shift
169+
exec "$RUNNER" org.apache.spark.deploy.SparkClassLauncher "$@"
172170
else
171+
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
172+
if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then
173+
echo -n "Spark Command: " 1>&2
174+
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
175+
echo -e "========================================\n" 1>&2
176+
fi
173177
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
174178
fi
175179

bin/spark-submit

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,47 +17,45 @@
1717
# limitations under the License.
1818
#
1919

20+
# NOTE: Any changes in this file must be reflected in SparkClassLauncher.scala!
21+
2022
export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
2123
ORIG_ARGS=("$@")
2224

2325
while (($#)); do
2426
if [ "$1" = "--deploy-mode" ]; then
25-
DEPLOY_MODE=$2
26-
elif [ "$1" = "--driver-memory" ]; then
27-
DRIVER_MEMORY=$2
27+
SPARK_SUBMIT_DEPLOY_MODE=$2
2828
elif [ "$1" = "--properties-file" ]; then
29-
PROPERTIES_FILE=$2
29+
SPARK_SUBMIT_PROPERTIES_FILE=$2
30+
elif [ "$1" = "--driver-memory" ]; then
31+
export SPARK_SUBMIT_DRIVER_MEMORY=$2
3032
elif [ "$1" = "--driver-library-path" ]; then
3133
export SPARK_SUBMIT_LIBRARY_PATH=$2
3234
elif [ "$1" = "--driver-class-path" ]; then
3335
export SPARK_SUBMIT_CLASSPATH=$2
3436
elif [ "$1" = "--driver-java-options" ]; then
35-
export SPARK_SUBMIT_OPTS=$2
37+
export SPARK_SUBMIT_JAVA_OPTS=$2
3638
fi
3739
shift
3840
done
3941

40-
DEPLOY_MODE=${DEPLOY_MODE:-"client"}
4142
DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
42-
PROPERTIES_FILE=${PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}
43+
export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"}
44+
export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}
4345

4446
# For client mode, the driver will be launched in the same JVM that launches
4547
# SparkSubmit, so we may need to read the properties file for any extra class
4648
# paths, library paths, java options and memory early on. Otherwise, it will
4749
# be too late by the time the JVM has started.
4850

49-
if [ "$DEPLOY_MODE" == "client" ]; then
50-
if [ -n "$DRIVER_MEMORY" ]; then
51-
export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
52-
fi
51+
if [ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" ]; then
5352
# Parse the properties file only if the special configs exist
5453
contains_special_configs=$(
55-
grep -e "spark.driver.extra*\|spark.driver.memory" "$PROPERTIES_FILE" | \
54+
grep -e "spark.driver.extra*\|spark.driver.memory" "$SPARK_SUBMIT_PROPERTIES_FILE" | \
5655
grep -v "^[[:space:]]*#"
5756
)
5857
if [ -n "$contains_special_configs" ]; then
59-
export PROPERTIES_FILE
60-
export SPARK_SUBMIT_CLIENT_MODE=1
58+
export SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
6159
fi
6260
fi
6361

core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala

Lines changed: 77 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -24,76 +24,94 @@ import scala.collection.JavaConversions._
2424
import org.apache.spark.util.{RedirectThread, Utils}
2525

2626
/**
27-
* Wrapper of `bin/spark-class` that prepares the launch environment of the child JVM properly.
27+
* Launch an application through Spark submit in client mode with the appropriate classpath,
28+
* library paths, java options and memory. These properties of the JVM must be set before the
29+
* driver JVM is launched. The sole purpose of this class is to avoid handling the complexity
30+
* of parsing the properties file for such relevant configs in BASH.
31+
*
32+
* Usage: org.apache.spark.deploy.SparkClassLauncher <application args>
2833
*/
2934
private[spark] object SparkClassLauncher {
3035

31-
// TODO: This is currently only used for running Spark submit in client mode.
32-
// The goal moving forward is to use this class for all use cases of `bin/spark-class`.
36+
// Note: This class depends on the behavior of `bin/spark-class` and `bin/spark-submit`.
37+
// Any changes made there must be reflected in this file.
3338

34-
/**
35-
* Launch a Spark class with the given class paths, library paths, java options and memory,
36-
* taking into account special `spark.driver.*` properties needed to start the driver JVM.
37-
*/
3839
def main(args: Array[String]): Unit = {
39-
if (args.size < 7) {
40-
System.err.println(
41-
"""
42-
|Usage: org.apache.spark.deploy.SparkClassLauncher
43-
|
44-
| [properties file] - path to your Spark properties file
45-
| [java runner] - command to launch the child JVM
46-
| [java class paths] - class paths to pass to the child JVM
47-
| [java library paths] - library paths to pass to the child JVM
48-
| [java opts] - java options to pass to the child JVM
49-
| [java memory] - memory used to launch the child JVM
50-
| [main class] - main class to run in the child JVM
51-
| <main args> - arguments passed to this main class
52-
|
53-
|Example:
54-
| org.apache.spark.deploy.SparkClassLauncher.SparkClassLauncher
55-
| conf/spark-defaults.conf java /classpath1:/classpath2 /librarypath1:/librarypath2
56-
| "-XX:-UseParallelGC -Dsome=property" 5g org.apache.spark.deploy.SparkSubmit
57-
| --master local --class org.apache.spark.examples.SparkPi 10
58-
""".stripMargin)
59-
System.exit(1)
60-
}
61-
val propertiesFile = args(0)
62-
val javaRunner = args(1)
63-
val clClassPaths = args(2)
64-
val clLibraryPaths = args(3)
65-
val clJavaOpts = Utils.splitCommandString(args(4))
66-
val clJavaMemory = args(5)
67-
val mainClass = args(6)
40+
val submitArgs = args
41+
val runner = sys.env("RUNNER")
42+
val classpath = sys.env("CLASSPATH")
43+
val javaOpts = sys.env("JAVA_OPTS")
44+
val defaultDriverMemory = sys.env("OUR_JAVA_MEM")
45+
46+
// Spark submit specific environment variables
47+
val deployMode = sys.env("SPARK_SUBMIT_DEPLOY_MODE")
48+
val propertiesFile = sys.env("SPARK_SUBMIT_PROPERTIES_FILE")
49+
val bootstrapDriver = sys.env("SPARK_SUBMIT_BOOTSTRAP_DRIVER")
50+
val submitDriverMemory = sys.env.get("SPARK_SUBMIT_DRIVER_MEMORY")
51+
val submitLibraryPath = sys.env.get("SPARK_SUBMIT_LIBRARY_PATH")
52+
val submitClasspath = sys.env.get("SPARK_SUBMIT_CLASSPATH")
53+
val submitJavaOpts = sys.env.get("SPARK_SUBMIT_JAVA_OPTS")
54+
55+
assume(runner != null, "RUNNER must be set")
56+
assume(classpath != null, "CLASSPATH must be set")
57+
assume(javaOpts != null, "JAVA_OPTS must be set")
58+
assume(defaultDriverMemory != null, "OUR_JAVA_MEM must be set")
59+
assume(deployMode == "client", "SPARK_SUBMIT_DEPLOY_MODE must be \"client\"!")
60+
assume(propertiesFile != null, "SPARK_SUBMIT_PROPERTIES_FILE must be set")
61+
assume(bootstrapDriver != null, "SPARK_SUBMIT_BOOTSTRAP_DRIVER must be set!")
6862

69-
// In client deploy mode, parse the properties file for certain `spark.driver.*` configs.
70-
// These configs encode java options, class paths, and library paths needed to launch the JVM.
63+
// Parse the properties file for the equivalent spark.driver.* configs
7164
val properties = SparkSubmitArguments.getPropertiesFromFile(new File(propertiesFile)).toMap
72-
val confDriverMemory = properties.get("spark.driver.memory")
73-
val confClassPaths = properties.get("spark.driver.extraClassPath")
74-
val confLibraryPaths = properties.get("spark.driver.extraLibraryPath")
75-
val confJavaOpts = properties.get("spark.driver.extraJavaOptions")
65+
val confDriverMemory = properties.get("spark.driver.memory").getOrElse(defaultDriverMemory)
66+
val confLibraryPath = properties.get("spark.driver.extraLibraryPath").getOrElse("")
67+
val confClasspath = properties.get("spark.driver.extraClassPath").getOrElse("")
68+
val confJavaOpts = properties.get("spark.driver.extraJavaOptions").getOrElse("")
7669

77-
// Merge relevant command line values with the config equivalents, if any
78-
val javaMemory = confDriverMemory.getOrElse(clJavaMemory)
79-
val pathSeparator = sys.props("path.separator")
80-
val classPaths = clClassPaths + confClassPaths.map(pathSeparator + _).getOrElse("")
81-
val libraryPaths = clLibraryPaths + confLibraryPaths.map(pathSeparator + _).getOrElse("")
82-
val javaOpts = clJavaOpts ++ confJavaOpts.map(Utils.splitCommandString).getOrElse(Seq.empty)
83-
val filteredJavaOpts = javaOpts.distinct.filterNot { opt =>
84-
opt.startsWith("-Djava.library.path") || opt.startsWith("-Xms") || opt.startsWith("-Xmx")
85-
}
70+
// Favor Spark submit arguments over the equivalent configs in the properties file.
71+
// Note that we do not actually use the Spark submit values for library path, classpath,
72+
// and java opts here, because we have already captured them in BASH.
73+
val newDriverMemory = submitDriverMemory.getOrElse(confDriverMemory)
74+
val newLibraryPath =
75+
if (submitLibraryPath.isDefined) {
76+
// SPARK_SUBMIT_LIBRARY_PATH is already captured in JAVA_OPTS
77+
""
78+
} else {
79+
"-Djava.library.path=" + confLibraryPath
80+
}
81+
val newClasspath =
82+
if (submitClasspath.isDefined) {
83+
// SPARK_SUBMIT_CLASSPATH is already captured in CLASSPATH
84+
classpath
85+
} else {
86+
classpath + sys.props("path.separator") + confClasspath
87+
}
88+
val newJavaOpts =
89+
if (submitJavaOpts.isDefined) {
90+
// SPARK_SUBMIT_JAVA_OPTS is already captured in JAVA_OPTS
91+
javaOpts
92+
} else {
93+
javaOpts + " " + confJavaOpts
94+
}
8695

8796
// Build up command
8897
val command: Seq[String] =
89-
Seq(javaRunner) ++
90-
{ if (classPaths.nonEmpty) Seq("-cp", classPaths) else Seq.empty } ++
91-
{ if (libraryPaths.nonEmpty) Seq(s"-Djava.library.path=$libraryPaths") else Seq.empty } ++
92-
filteredJavaOpts ++
93-
Seq(s"-Xms$javaMemory", s"-Xmx$javaMemory") ++
94-
Seq(mainClass) ++
95-
args.slice(7, args.size)
96-
val builder = new ProcessBuilder(command)
98+
Seq(runner) ++
99+
Seq("-cp", newClasspath) ++
100+
Seq(newLibraryPath) ++
101+
Utils.splitCommandString(newJavaOpts) ++
102+
Seq(s"-Xms$newDriverMemory", s"-Xmx$newDriverMemory") ++
103+
Seq("org.apache.spark.deploy.SparkSubmit") ++
104+
submitArgs
105+
106+
// Print the launch command. This follows closely the format used in `bin/spark-class`.
107+
if (sys.env.contains("SPARK_PRINT_LAUNCH_COMMAND")) {
108+
System.err.print("Spark Command: ")
109+
System.err.println(command.mkString(" "))
110+
System.err.println("========================================\n")
111+
}
112+
113+
val filteredCommand = command.filter(_.nonEmpty)
114+
val builder = new ProcessBuilder(filteredCommand)
97115
val process = builder.start()
98116
new RedirectThread(System.in, process.getOutputStream, "redirect stdin").start()
99117
new RedirectThread(process.getInputStream, System.out, "redirect stdout").start()

0 commit comments

Comments
 (0)