Skip to content

Commit 0effa1e

Browse files
committed
Add code in Scala that handles special configs
The eventual goal of this is to shift the current complex BASH logic to Scala. The new class should be invoked from `spark-class`. For simplicity, this currently does not handle SPARK-2914. It is likely that this will be dealt with in a future PR instead.
1 parent c886568 commit 0effa1e

File tree

5 files changed

+142
-29
lines changed

5 files changed

+142
-29
lines changed

core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -40,28 +40,3 @@ private[spark] object PythonUtils {
4040
paths.filter(_ != "").mkString(File.pathSeparator)
4141
}
4242
}
43-
44-
45-
/**
46-
* A utility class to redirect the child process's stdout or stderr.
47-
*/
48-
private[spark] class RedirectThread(
49-
in: InputStream,
50-
out: OutputStream,
51-
name: String)
52-
extends Thread(name) {
53-
54-
setDaemon(true)
55-
override def run() {
56-
scala.util.control.Exception.ignoring(classOf[IOException]) {
57-
// FIXME: We copy the stream on the level of bytes to avoid encoding problems.
58-
val buf = new Array[Byte](1024)
59-
var len = in.read(buf)
60-
while (len != -1) {
61-
out.write(buf, 0, len)
62-
out.flush()
63-
len = in.read(buf)
64-
}
65-
}
66-
}
67-
}

core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,14 @@
1717

1818
package org.apache.spark.api.python
1919

20-
import java.lang.Runtime
2120
import java.io.{DataOutputStream, DataInputStream, InputStream, OutputStreamWriter}
2221
import java.net.{InetAddress, ServerSocket, Socket, SocketException}
2322

2423
import scala.collection.mutable
2524
import scala.collection.JavaConversions._
2625

2726
import org.apache.spark._
28-
import org.apache.spark.util.Utils
27+
import org.apache.spark.util.{RedirectThread, Utils}
2928

3029
private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String, String])
3130
extends Logging {

core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ import java.net.URI
2222
import scala.collection.mutable.ArrayBuffer
2323
import scala.collection.JavaConversions._
2424

25-
import org.apache.spark.api.python.{PythonUtils, RedirectThread}
26-
import org.apache.spark.util.Utils
25+
import org.apache.spark.api.python.PythonUtils
26+
import org.apache.spark.util.{RedirectThread, Utils}
2727

2828
/**
2929
* A main class used by spark-submit to launch Python applications. It executes python as a
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.deploy
19+
20+
import java.io.File
21+
22+
import scala.collection.JavaConversions._
23+
24+
import org.apache.spark.util.{RedirectThread, Utils}
25+
26+
/**
27+
* Wrapper of `bin/spark-class` that prepares the launch environment of the child JVM properly.
28+
*/
29+
object SparkClassLauncher {
30+
31+
/**
32+
* Launch a Spark class with the given class paths, library paths, java options and memory.
33+
* If we are launching an application through Spark submit in client mode, we must also
34+
* take into account special `spark.driver.*` properties needed to start the driver JVM.
35+
*/
36+
def main(args: Array[String]): Unit = {
37+
if (args.size < 8) {
38+
System.err.println(
39+
"""
40+
|Usage: org.apache.spark.deploy.SparkClassLauncher
41+
|
42+
| [properties file] - path to your Spark properties file
43+
| [java runner] - command to launch the child JVM
44+
| [java class paths] - class paths to pass to the child JVM
45+
| [java library paths] - library paths to pass to the child JVM
46+
| [java opts] - java options to pass to the child JVM
47+
| [java memory] - memory used to launch the child JVM
48+
| [client mode] - whether the child JVM will run the Spark driver
49+
| [main class] - main class to run in the child JVM
50+
| <main args> - arguments passed to this main class
51+
|
52+
|Example:
53+
| org.apache.spark.deploy.SparkClassLauncher.SparkClassLauncher
54+
| conf/spark-defaults.conf java /classpath1:/classpath2 /librarypath1:/librarypath2
55+
| "-XX:-UseParallelGC -Dsome=property" 5g true org.apache.spark.deploy.SparkSubmit
56+
| --master local --class org.apache.spark.examples.SparkPi 10
57+
""".stripMargin)
58+
System.exit(1)
59+
}
60+
val propertiesFile = args(0)
61+
val javaRunner = args(1)
62+
val clClassPaths = args(2)
63+
val clLibraryPaths = args(3)
64+
val clJavaOpts = args(4)
65+
val clJavaMemory = args(5)
66+
val clientMode = args(6) == "true"
67+
val mainClass = args(7)
68+
69+
// In client deploy mode, parse the properties file for certain `spark.driver.*` configs.
70+
// These configs encode java options, class paths, and library paths needed to launch the JVM.
71+
val properties =
72+
if (clientMode) {
73+
SparkSubmitArguments.getPropertiesFromFile(new File(propertiesFile)).toMap
74+
} else {
75+
Map[String, String]()
76+
}
77+
val confDriverMemory = properties.get("spark.driver.memory")
78+
val confClassPaths = properties.get("spark.driver.extraClassPath")
79+
val confLibraryPaths = properties.get("spark.driver.extraLibraryPath")
80+
val confJavaOpts = properties.get("spark.driver.extraJavaOptions")
81+
82+
// Merge relevant command line values with the config equivalents, if any
83+
val javaMemory =
84+
if (clientMode) {
85+
confDriverMemory.getOrElse(clJavaMemory)
86+
} else {
87+
clJavaMemory
88+
}
89+
val pathSeparator = sys.props("path.separator")
90+
val classPaths = clClassPaths + confClassPaths.map(pathSeparator + _).getOrElse("")
91+
val libraryPaths = clLibraryPaths + confLibraryPaths.map(pathSeparator + _).getOrElse("")
92+
val javaOpts = Utils.splitCommandString(clJavaOpts) ++
93+
confJavaOpts.map(Utils.splitCommandString).getOrElse(Seq.empty)
94+
val filteredJavaOpts = javaOpts.filterNot { opt =>
95+
opt.startsWith("-Djava.library.path") || opt.startsWith("-Xms") || opt.startsWith("-Xmx")
96+
}
97+
98+
// Build up command
99+
val command: Seq[String] =
100+
Seq(javaRunner) ++
101+
{ if (classPaths.nonEmpty) Seq("-cp", classPaths) else Seq.empty } ++
102+
{ if (libraryPaths.nonEmpty) Seq(s"-Djava.library.path=$libraryPaths") else Seq.empty } ++
103+
filteredJavaOpts ++
104+
Seq(s"-Xms$javaMemory", s"-Xmx$javaMemory") ++
105+
Seq(mainClass) ++
106+
args.slice(8, args.size)
107+
108+
command.foreach(println)
109+
110+
val builder = new ProcessBuilder(command)
111+
val process = builder.start()
112+
new RedirectThread(System.in, process.getOutputStream, "redirect stdin").start()
113+
new RedirectThread(process.getInputStream, System.out, "redirect stdout").start()
114+
new RedirectThread(process.getErrorStream, System.err, "redirect stderr").start()
115+
System.exit(process.waitFor())
116+
}
117+
118+
}

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,3 +1421,24 @@ private[spark] object Utils extends Logging {
14211421
}
14221422

14231423
}
1424+
1425+
/**
1426+
* A utility class to redirect the child process's stdout or stderr.
1427+
*/
1428+
private[spark] class RedirectThread(in: InputStream, out: OutputStream, name: String)
1429+
extends Thread(name) {
1430+
1431+
setDaemon(true)
1432+
override def run() {
1433+
scala.util.control.Exception.ignoring(classOf[IOException]) {
1434+
// FIXME: We copy the stream on the level of bytes to avoid encoding problems.
1435+
val buf = new Array[Byte](1024)
1436+
var len = in.read(buf)
1437+
while (len != -1) {
1438+
out.write(buf, 0, len)
1439+
out.flush()
1440+
len = in.read(buf)
1441+
}
1442+
}
1443+
}
1444+
}

0 commit comments

Comments
 (0)