Skip to content

Commit 83ebe60

Browse files
committed
Parse special driver configs in Windows (broken)
Note that this is still currently broken. There is an issue with using SparkSubmitDriverBootstrapper with windows; the stdin is not being picked up properly by the SparkSubmit subprocess. This must be fixed before the PR is merged.
1 parent 52fbdc2 commit 83ebe60

File tree

2 files changed

+62
-17
lines changed

2 files changed

+62
-17
lines changed

bin/spark-class2.cmd

100755100644
Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ rem See the License for the specific language governing permissions and
1717
rem limitations under the License.
1818
rem
1919

20+
rem Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala!
21+
2022
setlocal enabledelayedexpansion
2123

2224
set SCALA_VERSION=2.10
@@ -38,7 +40,7 @@ if not "x%1"=="x" goto arg_given
3840

3941
if not "x%SPARK_MEM%"=="x" (
4042
echo Warning: SPARK_MEM is deprecated, please use a more specific config option
41-
echo e.g., spark.executor.memory or SPARK_DRIVER_MEMORY.
43+
echo e.g., spark.executor.memory or spark.driver.memory.
4244
)
4345

4446
rem Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
@@ -67,10 +69,18 @@ rem Executors use SPARK_JAVA_OPTS + SPARK_EXECUTOR_MEMORY.
6769
set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_EXECUTOR_OPTS%
6870
if not "x%SPARK_EXECUTOR_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_EXECUTOR_MEMORY%
6971

70-
rem All drivers use SPARK_JAVA_OPTS + SPARK_DRIVER_MEMORY. The repl also uses SPARK_REPL_OPTS.
71-
) else if "%1"=="org.apache.spark.repl.Main" (
72-
set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_REPL_OPTS%
72+
rem Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
73+
rem SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
74+
rem The repl also uses SPARK_REPL_OPTS.
75+
) else if "%1"=="org.apache.spark.deploy.SparkSubmit" (
76+
set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_SUBMIT_OPTS% %SPARK_REPL_OPTS%
77+
if not "x%SPARK_SUBMIT_LIBRARY_PATH%"=="x" (
78+
set OUR_JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_SUBMIT_LIBRARY_PATH%
79+
) else if not "x%SPARK_LIBRARY_PATH%"=="x" (
80+
set OUR_JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH%
81+
)
7382
if not "x%SPARK_DRIVER_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_DRIVER_MEMORY%
83+
if not "x%SPARK_SUBMIT_DRIVER_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_SUBMIT_DRIVER_MEMORY%
7484
) else (
7585
set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS%
7686
if not "x%SPARK_DRIVER_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_DRIVER_MEMORY%
@@ -80,9 +90,9 @@ rem Set JAVA_OPTS to be able to load native libraries and to set heap size
8090
for /f "tokens=3" %%i in ('java -version 2^>^&1 ^| find "version"') do set jversion=%%i
8191
for /f "tokens=1 delims=_" %%i in ("%jversion:~1,-1%") do set jversion=%%i
8292
if "%jversion%" geq "1.8.0" (
83-
set JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
93+
set JAVA_OPTS=%OUR_JAVA_OPTS% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
8494
) else (
85-
set JAVA_OPTS=-XX:MaxPermSize=128m %OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
95+
set JAVA_OPTS=-XX:MaxPermSize=128m %OUR_JAVA_OPTS% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
8696
)
8797
rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
8898

@@ -115,5 +125,27 @@ rem Figure out where java is.
115125
set RUNNER=java
116126
if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
117127

118-
"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
128+
rem In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
129+
rem Here we must parse the properties file for relevant "spark.driver.*" configs before launching
130+
rem the driver JVM itself. Instead of handling this complexity in Bash, we launch a separate JVM
131+
rem to prepare the launch environment of this driver JVM.
132+
133+
rem In this case, leave out the main class (org.apache.spark.deploy.SparkSubmit) and use our own.
134+
rem Leaving out the first argument is surprisingly difficult to do in Windows. Note that this must
135+
rem be done here because the Windows "shift" command does not work in a conditional block.
136+
set BOOTSTRAP_ARGS=
137+
shift
138+
:start_parse
139+
if "%~1" == "" goto end_parse
140+
set BOOTSTRAP_ARGS=%BOOTSTRAP_ARGS% %~1
141+
shift
142+
goto start_parse
143+
:end_parse
144+
145+
if not [%SPARK_SUBMIT_BOOTSTRAP_DRIVER%] == [] (
146+
set SPARK_CLASS=1
147+
"%RUNNER%" org.apache.spark.deploy.SparkSubmitDriverBootstrapper %BOOTSTRAP_ARGS%
148+
) else (
149+
"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
150+
)
119151
:exit

bin/spark-submit.cmd

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,29 @@ rem See the License for the specific language governing permissions and
1717
rem limitations under the License.
1818
rem
1919

20+
rem NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala!
21+
2022
set SPARK_HOME=%~dp0..
2123
set ORIG_ARGS=%*
2224

23-
rem Clear the values of all variables used
24-
set DEPLOY_MODE=
25-
set DRIVER_MEMORY=
25+
rem Reset the values of all variables used
26+
set SPARK_SUBMIT_DEPLOY_MODE=client
27+
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf
28+
set SPARK_SUBMIT_DRIVER_MEMORY=
2629
set SPARK_SUBMIT_LIBRARY_PATH=
2730
set SPARK_SUBMIT_CLASSPATH=
2831
set SPARK_SUBMIT_OPTS=
2932
set SPARK_DRIVER_MEMORY=
33+
set SPARK_SUBMIT_BOOTSTRAP_DRIVER=
3034

3135
:loop
3236
if [%1] == [] goto continue
3337
if [%1] == [--deploy-mode] (
34-
set DEPLOY_MODE=%2
38+
set SPARK_SUBMIT_DEPLOY_MODE=%2
39+
) else if [%1] == [--properties-file] (
40+
set SPARK_SUBMIT_PROPERTIES_FILE=%2
3541
) else if [%1] == [--driver-memory] (
36-
set DRIVER_MEMORY=%2
42+
set SPARK_SUBMIT_DRIVER_MEMORY=%2
3743
) else if [%1] == [--driver-library-path] (
3844
set SPARK_SUBMIT_LIBRARY_PATH=%2
3945
) else if [%1] == [--driver-class-path] (
@@ -45,12 +51,19 @@ if [%1] == [] goto continue
4551
goto loop
4652
:continue
4753

48-
if [%DEPLOY_MODE%] == [] (
49-
set DEPLOY_MODE=client
50-
)
54+
rem For client mode, the driver will be launched in the same JVM that launches
55+
rem SparkSubmit, so we may need to read the properties file for any extra class
56+
rem paths, library paths, java options and memory early on. Otherwise, it will
57+
rem be too late by the time the driver JVM has started.
5158

52-
if not [%DRIVER_MEMORY%] == [] if [%DEPLOY_MODE%] == [client] (
53-
set SPARK_DRIVER_MEMORY=%DRIVER_MEMORY%
59+
if [%SPARK_SUBMIT_DEPLOY_MODE%] == [client] (
60+
if exist %SPARK_SUBMIT_PROPERTIES_FILE% (
61+
rem Parse the properties file only if the special configs exist
62+
for /f %%i in ('findstr /r /c:"^[\t ]*spark.driver.memory" /c:"^[\t ]*spark.driver.extra" ^
63+
%SPARK_SUBMIT_PROPERTIES_FILE%') do (
64+
set SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
65+
)
66+
)
5467
)
5568

5669
cmd /V /E /C %SPARK_HOME%\bin\spark-class.cmd org.apache.spark.deploy.SparkSubmit %ORIG_ARGS%

0 commit comments

Comments
 (0)