diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd index 58710cd1bd54..c2505ff61794 100644 --- a/bin/compute-classpath.cmd +++ b/bin/compute-classpath.cmd @@ -37,6 +37,13 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd" rem Build up classpath set CLASSPATH=%FWDIR%conf + +if "x%SPARK_CONF_DIR%"!="x" ( + rem If SPARK_CONF_DIR is defined give it preference over default conf in spark home + + set CLASSPATH=%SPARK_CONF_DIR%;%CLASSPATH% +) + if exist "%FWDIR%RELEASE" ( for %%d in ("%FWDIR%lib\spark-assembly*.jar") do ( set ASSEMBLY_JAR=%%d diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 7df43a555d56..2916e51eca08 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -30,6 +30,11 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Build up classpath CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:$FWDIR/conf" +# If SPARK_CONF_DIR is defined give it preference over default conf in spark home +if [ -n "$SPARK_CONF_DIR" ]; then + CLASSPATH="$SPARK_CONF_DIR:$CLASSPATH" +fi + ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION" if [ -n "$JAVA_HOME" ]; then diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 7e9a9344e61f..52975425e074 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -47,7 +47,7 @@ object SparkSubmit { private val PYSPARK_SHELL = "pyspark-shell" def main(args: Array[String]) { - val appArgs = new SparkSubmitArguments(args) + val appArgs = new SparkSubmitArguments(args, sys.env) if (appArgs.verbose) { printStream.println(appArgs) } diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 153eee3bc588..55ed8eab125e 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -30,7 +30,7 @@ import org.apache.spark.util.Utils /** * Parses and encapsulates arguments from the spark-submit script. */ -private[spark] class SparkSubmitArguments(args: Seq[String]) { +private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, String] = sys.env) { var master: String = null var deployMode: String = null var executorMemory: String = null @@ -83,9 +83,12 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { // Use common defaults file, if not specified by user if (propertiesFile == null) { - sys.env.get("SPARK_HOME").foreach { sparkHome => - val sep = File.separator - val defaultPath = s"${sparkHome}${sep}conf${sep}spark-defaults.conf" + val sep = File.separator + val sparkHomeConfig = env.get("SPARK_HOME").map(sparkHome => s"${sparkHome}${sep}conf") + + // give preference to user defined conf over the one in spark home + env.get("SPARK_CONF_DIR").orElse(sparkHomeConfig).foreach { configPath => + val defaultPath = s"${configPath}${sep}spark-defaults.conf" val file = new File(defaultPath) if (file.exists()) { propertiesFile = file.getAbsolutePath @@ -161,7 +164,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { } if (master.startsWith("yarn")) { - val hasHadoopEnv = sys.env.contains("HADOOP_CONF_DIR") || sys.env.contains("YARN_CONF_DIR") + val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || env.contains("YARN_CONF_DIR") if (!hasHadoopEnv && !Utils.isTesting) { throw new Exception(s"When running with master '$master' " + "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.") diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 02427a4a8350..17a0019d4e37 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.deploy -import java.io.{File, OutputStream, PrintStream} +import java.io._ import scala.collection.mutable.ArrayBuffer @@ -26,6 +26,7 @@ import org.apache.spark.deploy.SparkSubmit._ import org.apache.spark.util.Utils import org.scalatest.FunSuite import org.scalatest.matchers.ShouldMatchers +import com.google.common.io.Files class SparkSubmitSuite extends FunSuite with ShouldMatchers { def beforeAll() { @@ -264,6 +265,21 @@ class SparkSubmitSuite extends FunSuite with ShouldMatchers { runSparkSubmit(args) } + test("SPARK_CONF_DIR overrides spark-defaults.conf") { + forConfDir(Map("spark.executor.memory" -> "2.3g")) { path => + val unusedJar = TestUtils.createJarWithClasses(Seq.empty) + val args = Seq( + "--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"), + "--name", "testApp", + "--master", "local", + unusedJar.toString) + val appArgs = new SparkSubmitArguments(args, Map("SPARK_CONF_DIR" -> path)) + assert(appArgs.propertiesFile != null) + assert(appArgs.propertiesFile.startsWith(path)) + appArgs.executorMemory should be ("2.3g") + } + } + // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly. def runSparkSubmit(args: Seq[String]): String = { val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.props.get("spark.home")).get @@ -272,6 +288,22 @@ class SparkSubmitSuite extends FunSuite with ShouldMatchers { new File(sparkHome), Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome)) } + + def forConfDir(defaults: Map[String, String]) (f: String => Unit) = { + val tmpDir = Files.createTempDir() + + val defaultsConf = new File(tmpDir.getAbsolutePath, "spark-defaults.conf") + val writer = new OutputStreamWriter(new FileOutputStream(defaultsConf)) + for ((key, value) <- defaults) writer.write(s"$key $value\n") + + writer.close() + + try { + f(tmpDir.getAbsolutePath) + } finally { + Utils.deleteRecursively(tmpDir) + } + } } object JarCreationTest { diff --git a/docs/configuration.md b/docs/configuration.md index 71fafa573467..527d7b9ce9e0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -839,3 +839,16 @@ compute `SPARK_LOCAL_IP` by looking up the IP of a specific network interface. Spark uses [log4j](http://logging.apache.org/log4j/) for logging. You can configure it by adding a `log4j.properties` file in the `conf` directory. One way to start is to copy the existing `log4j.properties.template` located there. + +# Overriding configuration + +In some cases you might want to provide all configuration from another place than the default SPARK_HOME/conf dir. +For example if you are using the prepackaged version of Spark or if you are building it your self but want to be +independent from your cluster configuration (managed by an automation tool). + +In that scenario you can define the SPARK_CONF_DIR variable pointing to an alternate directory containing you configuration. +Spark will then use it for the following configurations: + + * spark-defaults.conf and spark-env.sh will be loaded only from the SPARK_CONF_DIR + * log4j.properties, fairscheduler.xml and metrics.properties if present will be loaded from SPARK_CONF_DIR, + but if missing, the ones from SPARK_HOME/conf will be used.