Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion bin/compute-classpath.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@ rem Load environment variables from conf\spark-env.cmd, if it exists
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"

rem Build up classpath
set CLASSPATH=%SPARK_CLASSPATH%;%SPARK_SUBMIT_CLASSPATH%;%FWDIR%conf
set CLASSPATH=%SPARK_CLASSPATH%;%SPARK_SUBMIT_CLASSPATH%

if "x%SPARK_CONF_DIR%"!="x" (
set CLASSPATH=%CLASSPATH%;%SPARK_CONF_DIR%
) else (
set CLASSPATH=%CLASSPATH%;%FWDIR%conf
)

if exist "%FWDIR%RELEASE" (
for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
Expand Down
8 changes: 7 additions & 1 deletion bin/compute-classpath.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,14 @@ FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

. "$FWDIR"/bin/load-spark-env.sh

CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would move this back down to where the comment is, but this is trivial I can fix it myself when I merge it.


# Build up classpath
CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:$FWDIR/conf"
if [ -n "$SPARK_CONF_DIR" ]; then
CLASSPATH="$CLASSPATH:$SPARK_CONF_DIR"
else
CLASSPATH="$CLASSPATH:$FWDIR/conf"
fi

ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ import org.apache.spark.util.Utils

/**
* Parses and encapsulates arguments from the spark-submit script.
* The env argument is used for testing.
*/
private[spark] class SparkSubmitArguments(args: Seq[String]) {
private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, String] = sys.env) {
var master: String = null
var deployMode: String = null
var executorMemory: String = null
Expand Down Expand Up @@ -86,20 +87,12 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
private def mergeSparkProperties(): Unit = {
// Use common defaults file, if not specified by user
if (propertiesFile == null) {
sys.env.get("SPARK_CONF_DIR").foreach { sparkConfDir =>
val sep = File.separator
val defaultPath = s"${sparkConfDir}${sep}spark-defaults.conf"
val file = new File(defaultPath)
if (file.exists()) {
propertiesFile = file.getAbsolutePath
}
}
}
val sep = File.separator
val sparkHomeConfig = env.get("SPARK_HOME").map(sparkHome => s"${sparkHome}${sep}conf")
val confDir = env.get("SPARK_CONF_DIR").orElse(sparkHomeConfig)

if (propertiesFile == null) {
sys.env.get("SPARK_HOME").foreach { sparkHome =>
val sep = File.separator
val defaultPath = s"${sparkHome}${sep}conf${sep}spark-defaults.conf"
confDir.foreach { sparkConfDir =>
val defaultPath = s"${sparkConfDir}${sep}spark-defaults.conf"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 for this change. The previous code is duplicated.

val file = new File(defaultPath)
if (file.exists()) {
propertiesFile = file.getAbsolutePath
Expand All @@ -113,19 +106,18 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {

// Use properties file as fallback for values which have a direct analog to
// arguments in this script.
master = Option(master).getOrElse(properties.get("spark.master").orNull)
executorMemory = Option(executorMemory)
.getOrElse(properties.get("spark.executor.memory").orNull)
executorCores = Option(executorCores)
.getOrElse(properties.get("spark.executor.cores").orNull)
master = Option(master).orElse(properties.get("spark.master")).orNull
executorMemory = Option(executorMemory).orElse(properties.get("spark.executor.memory")).orNull
executorCores = Option(executorCores).orElse(properties.get("spark.executor.cores")).orNull
totalExecutorCores = Option(totalExecutorCores)
.getOrElse(properties.get("spark.cores.max").orNull)
name = Option(name).getOrElse(properties.get("spark.app.name").orNull)
jars = Option(jars).getOrElse(properties.get("spark.jars").orNull)
.orElse(properties.get("spark.cores.max"))
.orNull
name = Option(name).orElse(properties.get("spark.app.name")).orNull
jars = Option(jars).orElse(properties.get("spark.jars")).orNull

// This supports env vars in older versions of Spark
master = Option(master).getOrElse(System.getenv("MASTER"))
deployMode = Option(deployMode).getOrElse(System.getenv("DEPLOY_MODE"))
master = Option(master).orElse(env.get("MASTER")).orNull
deployMode = Option(deployMode).orElse(env.get("DEPLOY_MODE")).orNull

// Try to set main class from JAR if no --class argument is given
if (mainClass == null && !isPython && primaryResource != null) {
Expand Down Expand Up @@ -178,7 +170,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
}

if (master.startsWith("yarn")) {
val hasHadoopEnv = sys.env.contains("HADOOP_CONF_DIR") || sys.env.contains("YARN_CONF_DIR")
val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || env.contains("YARN_CONF_DIR")
if (!hasHadoopEnv && !Utils.isTesting) {
throw new Exception(s"When running with master '$master' " +
"either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.deploy

import java.io.{File, OutputStream, PrintStream}
import java.io._

import scala.collection.mutable.ArrayBuffer

Expand All @@ -26,6 +26,7 @@ import org.apache.spark.deploy.SparkSubmit._
import org.apache.spark.util.Utils
import org.scalatest.FunSuite
import org.scalatest.Matchers
import com.google.common.io.Files

class SparkSubmitSuite extends FunSuite with Matchers {
def beforeAll() {
Expand Down Expand Up @@ -306,6 +307,21 @@ class SparkSubmitSuite extends FunSuite with Matchers {
runSparkSubmit(args)
}

test("SPARK_CONF_DIR overrides spark-defaults.conf") {
forConfDir(Map("spark.executor.memory" -> "2.3g")) { path =>
val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
val args = Seq(
"--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
"--name", "testApp",
"--master", "local",
unusedJar.toString)
val appArgs = new SparkSubmitArguments(args, Map("SPARK_CONF_DIR" -> path))
assert(appArgs.propertiesFile != null)
assert(appArgs.propertiesFile.startsWith(path))
appArgs.executorMemory should be ("2.3g")
}
}

// NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
def runSparkSubmit(args: Seq[String]): String = {
val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
Expand All @@ -314,6 +330,22 @@ class SparkSubmitSuite extends FunSuite with Matchers {
new File(sparkHome),
Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
}

def forConfDir(defaults: Map[String, String]) (f: String => Unit) = {
val tmpDir = Files.createTempDir()

val defaultsConf = new File(tmpDir.getAbsolutePath, "spark-defaults.conf")
val writer = new OutputStreamWriter(new FileOutputStream(defaultsConf))
for ((key, value) <- defaults) writer.write(s"$key $value\n")

writer.close()

try {
f(tmpDir.getAbsolutePath)
} finally {
Utils.deleteRecursively(tmpDir)
}
}
}

object JarCreationTest {
Expand Down
7 changes: 7 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -1088,3 +1088,10 @@ compute `SPARK_LOCAL_IP` by looking up the IP of a specific network interface.
Spark uses [log4j](http://logging.apache.org/log4j/) for logging. You can configure it by adding a
`log4j.properties` file in the `conf` directory. One way to start is to copy the existing
`log4j.properties.template` located there.

# Overriding configuration directory

To specify a different configuration directory other than the default "SPARK_HOME/conf",
you can set SPARK_CONF_DIR. Spark will use the the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc)
from this directory.