apache · EugenCepoi · Jun 6, 2014 · Jun 6, 2014 · Jun 6, 2014 · Jun 12, 2014
diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
@@ -37,6 +37,13 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
 
 rem Build up classpath
 set CLASSPATH=%FWDIR%conf
+
+if "x%SPARK_CONF_DIR%"!="x" (
+  rem If SPARK_CONF_DIR is defined give it preference over default conf in spark home
+
+  set CLASSPATH=%SPARK_CONF_DIR%;%CLASSPATH%
+)
+
 if exist "%FWDIR%RELEASE" (
   for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
     set ASSEMBLY_JAR=%%d

diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
@@ -30,6 +30,11 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
 # Build up classpath
 CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:$FWDIR/conf"
 
+# If SPARK_CONF_DIR is defined give it preference over default conf in spark home
+if [ -n "$SPARK_CONF_DIR" ]; then
+  CLASSPATH="$SPARK_CONF_DIR:$CLASSPATH"
+fi
+
 ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION"
 
 if [ -n "$JAVA_HOME" ]; then

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -47,7 +47,7 @@ object SparkSubmit {
   private val PYSPARK_SHELL = "pyspark-shell"
 
   def main(args: Array[String]) {
-    val appArgs = new SparkSubmitArguments(args)
+    val appArgs = new SparkSubmitArguments(args, sys.env)
     if (appArgs.verbose) {
       printStream.println(appArgs)
     }

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -30,7 +30,7 @@ import org.apache.spark.util.Utils
 /**
  * Parses and encapsulates arguments from the spark-submit script.
  */
-private[spark] class SparkSubmitArguments(args: Seq[String]) {
+private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, String] = sys.env) {
   var master: String = null
   var deployMode: String = null
   var executorMemory: String = null
@@ -83,9 +83,12 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
 
     // Use common defaults file, if not specified by user
     if (propertiesFile == null) {
-      sys.env.get("SPARK_HOME").foreach { sparkHome =>
-        val sep = File.separator
-        val defaultPath = s"${sparkHome}${sep}conf${sep}spark-defaults.conf"
+      val sep = File.separator
+      val sparkHomeConfig = env.get("SPARK_HOME").map(sparkHome => s"${sparkHome}${sep}conf")
+
+      // give preference to user defined conf over the one in spark home
+      env.get("SPARK_CONF_DIR").orElse(sparkHomeConfig).foreach { configPath =>
+        val defaultPath = s"${configPath}${sep}spark-defaults.conf"
         val file = new File(defaultPath)
         if (file.exists()) {
           propertiesFile = file.getAbsolutePath
@@ -161,7 +164,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
     }
 
     if (master.startsWith("yarn")) {
-      val hasHadoopEnv = sys.env.contains("HADOOP_CONF_DIR") || sys.env.contains("YARN_CONF_DIR")
+      val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || env.contains("YARN_CONF_DIR")
       if (!hasHadoopEnv && !Utils.isTesting) {
         throw new Exception(s"When running with master '$master' " +
           "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.")

diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy
 
-import java.io.{File, OutputStream, PrintStream}
+import java.io._
 
 import scala.collection.mutable.ArrayBuffer
 
@@ -26,6 +26,7 @@ import org.apache.spark.deploy.SparkSubmit._
 import org.apache.spark.util.Utils
 import org.scalatest.FunSuite
 import org.scalatest.matchers.ShouldMatchers
+import com.google.common.io.Files
 
 class SparkSubmitSuite extends FunSuite with ShouldMatchers {
   def beforeAll() {
@@ -264,6 +265,21 @@ class SparkSubmitSuite extends FunSuite with ShouldMatchers {
     runSparkSubmit(args)
   }
 
+  test("SPARK_CONF_DIR overrides spark-defaults.conf") {
+    forConfDir(Map("spark.executor.memory" -> "2.3g")) { path =>
+      val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+      val args = Seq(
+        "--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
+        "--name", "testApp",
+        "--master", "local",
+        unusedJar.toString)
+      val appArgs = new SparkSubmitArguments(args, Map("SPARK_CONF_DIR" -> path))
+      assert(appArgs.propertiesFile != null)
+      assert(appArgs.propertiesFile.startsWith(path))
+      appArgs.executorMemory should  be ("2.3g")
+    }
+  }
+
   // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
   def runSparkSubmit(args: Seq[String]): String = {
     val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.props.get("spark.home")).get
@@ -272,6 +288,22 @@ class SparkSubmitSuite extends FunSuite with ShouldMatchers {
       new File(sparkHome),
       Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
   }
+
+  def forConfDir(defaults: Map[String, String]) (f: String => Unit) = {
+    val tmpDir = Files.createTempDir()
+
+    val defaultsConf = new File(tmpDir.getAbsolutePath, "spark-defaults.conf")
+    val writer = new OutputStreamWriter(new FileOutputStream(defaultsConf))
+    for ((key, value) <- defaults) writer.write(s"$key $value\n")
+
+    writer.close()
+
+    try {
+      f(tmpDir.getAbsolutePath)
+    } finally {
+      Utils.deleteRecursively(tmpDir)
+    }
+  }
 }
 
 object JarCreationTest {

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -839,3 +839,16 @@ compute `SPARK_LOCAL_IP` by looking up the IP of a specific network interface.
 Spark uses [log4j](http://logging.apache.org/log4j/) for logging. You can configure it by adding a
 `log4j.properties` file in the `conf` directory. One way to start is to copy the existing
 `log4j.properties.template` located there.
+
+# Overriding configuration
+
+In some cases you might want to provide all configuration from another place than the default SPARK_HOME/conf dir.
+For example if you are using the prepackaged version of Spark or if you are building it your self but want to be
+independent from your cluster configuration (managed by an automation tool).
+
+In that scenario you can define the SPARK_CONF_DIR variable pointing to an alternate directory containing you configuration.
+Spark will then use it for the following configurations:
+
+ * spark-defaults.conf and spark-env.sh will be loaded only from the SPARK_CONF_DIR
+ * log4j.properties, fairscheduler.xml and metrics.properties if present will be loaded from SPARK_CONF_DIR,
+ but if missing, the ones from SPARK_HOME/conf will be used.