From 5f43f453df19c1b281aa0a0e74d06b543a6e2d95 Mon Sep 17 00:00:00 2001 From: WangTao Date: Tue, 2 Dec 2014 23:02:21 +0800 Subject: [PATCH 1/6] System property can override environment variable --- .../org/apache/spark/deploy/yarn/ClientArguments.scala | 4 ++-- .../scheduler/cluster/YarnClientSchedulerBackend.scala | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala index 4d859450efc6..a96aa1c59185 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala @@ -59,12 +59,12 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf) // For backward compatibility, SPARK_YARN_DIST_{ARCHIVES/FILES} should be resolved to hdfs://, // while spark.yarn.dist.{archives/files} should be resolved to file:// (SPARK-2051). files = Option(files) - .orElse(sys.env.get("SPARK_YARN_DIST_FILES")) .orElse(sparkConf.getOption("spark.yarn.dist.files").map(p => Utils.resolveURIs(p))) + .orElse(sys.env.get("SPARK_YARN_DIST_FILES")) .orNull archives = Option(archives) - .orElse(sys.env.get("SPARK_YARN_DIST_ARCHIVES")) .orElse(sparkConf.getOption("spark.yarn.dist.archives").map(p => Utils.resolveURIs(p))) + .orElse(sys.env.get("SPARK_YARN_DIST_ARCHIVES")) .orNull // If dynamic allocation is enabled, start at the max number of executors if (isDynamicAllocationEnabled) { diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index 2923e6729cd6..9816f9a7599e 100644 --- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -79,11 +79,8 @@ private[spark] class YarnClientSchedulerBackend( ("--name", "SPARK_YARN_APP_NAME", "spark.app.name") ) optionTuples.foreach { case (optionName, envVar, sparkProp) => - if (System.getenv(envVar) != null) { - extraArgs += (optionName, System.getenv(envVar)) - } else if (sc.getConf.contains(sparkProp)) { - extraArgs += (optionName, sc.getConf.get(sparkProp)) - } + if (System.getenv(envVar) != null) extraArgs += (optionName, System.getenv(envVar)) + if (sc.getConf.contains(sparkProp)) extraArgs += (optionName, sc.getConf.get(sparkProp)) } extraArgs } From 40934b45e6c3c820321cd721b18844f4b90fda1b Mon Sep 17 00:00:00 2001 From: WangTaoTheTonic Date: Wed, 3 Dec 2014 16:39:52 +0800 Subject: [PATCH 2/6] just switch blocks --- .../scheduler/cluster/YarnClientSchedulerBackend.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index 9816f9a7599e..cb9598e74046 100644 --- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -79,8 +79,11 @@ private[spark] class YarnClientSchedulerBackend( ("--name", "SPARK_YARN_APP_NAME", "spark.app.name") ) optionTuples.foreach { case (optionName, envVar, sparkProp) => - if (System.getenv(envVar) != null) extraArgs += (optionName, System.getenv(envVar)) - if (sc.getConf.contains(sparkProp)) extraArgs += (optionName, sc.getConf.get(sparkProp)) + if (sc.getConf.contains(sparkProp)) { + extraArgs += (optionName, sc.getConf.get(sparkProp)) + } else if (System.getenv(envVar) != null) { + extraArgs += (optionName, System.getenv(envVar)) + } } extraArgs } From bee944746bf8615fbcddfe83f13a988dbc215511 Mon Sep 17 00:00:00 2001 From: WangTaoTheTonic Date: Thu, 8 Jan 2015 19:52:57 +0800 Subject: [PATCH 3/6] wrong brace --- .../scheduler/cluster/YarnClientSchedulerBackend.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index 3841e268b360..02df3378b3a2 100644 --- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -92,11 +92,11 @@ private[spark] class YarnClientSchedulerBackend( extraArgs += (optionName, sc.getConf.get(sparkProp)) if (deprecatedProps.contains(sparkProp)) { logWarning(s"NOTE: $sparkProp is deprecated. Use ${deprecatedProps(sparkProp)} instead.") - } else if (System.getenv(envVar) != null) { - extraArgs += (optionName, System.getenv(envVar)) - if (deprecatedEnvVars.contains(envVar)) { - logWarning(s"NOTE: $envVar is deprecated. Use ${deprecatedEnvVars(envVar)} instead.") - } + } + } else if (System.getenv(envVar) != null) { + extraArgs += (optionName, System.getenv(envVar)) + if (deprecatedEnvVars.contains(envVar)) { + logWarning(s"NOTE: $envVar is deprecated. Use ${deprecatedEnvVars(envVar)} instead.") } } } From 1262d571d8a431066b584d4e6a1212ed642b7713 Mon Sep 17 00:00:00 2001 From: WangTaoTheTonic Date: Mon, 12 Jan 2015 12:03:15 +0800 Subject: [PATCH 4/6] handle spark.app.name and SPARK_YARN_APP_NAME in SparkSubmitArguments --- .../scala/org/apache/spark/deploy/SparkSubmitArguments.scala | 4 ++++ .../spark/scheduler/cluster/YarnClientSchedulerBackend.scala | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 1faabe91f49a..09b8be5f6ec3 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -149,6 +149,10 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St // Global defaults. These should be keep to minimum to avoid confusing behavior. master = Option(master).getOrElse("local[*]") + // In yarn mode, app name can be set via SPARK_YARN_APP_NAME + if (master.contains("yarn")) { + name = Option(name).orElse(env.get("SPARK_YARN_APP_NAME")) + } // Set name from main class if not given name = Option(name).orElse(Option(mainClass)).orNull if (name == null && primaryResource != null) { diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index 02df3378b3a2..8ea3f552a48b 100644 --- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -77,7 +77,6 @@ private[spark] class YarnClientSchedulerBackend( ("--executor-cores", "SPARK_WORKER_CORES", "spark.executor.cores"), ("--executor-cores", "SPARK_EXECUTOR_CORES", "spark.executor.cores"), ("--queue", "SPARK_YARN_QUEUE", "spark.yarn.queue"), - ("--name", "SPARK_YARN_APP_NAME", "spark.app.name") ) // Warn against the following deprecated environment variables: env var -> suggestion val deprecatedEnvVars = Map( @@ -100,6 +99,7 @@ private[spark] class YarnClientSchedulerBackend( } } } + sc.getConf.getOption("spark.app.name").foreach(v => extraArgs += ("--name", v)) extraArgs } From e3e486ad0bb8f06cca1cad1ceba3eb8c88180052 Mon Sep 17 00:00:00 2001 From: WangTaoTheTonic Date: Mon, 12 Jan 2015 12:08:37 +0800 Subject: [PATCH 5/6] remove the comma --- .../spark/scheduler/cluster/YarnClientSchedulerBackend.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index 8ea3f552a48b..6a9fae6e6a48 100644 --- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -76,7 +76,7 @@ private[spark] class YarnClientSchedulerBackend( ("--executor-memory", "SPARK_EXECUTOR_MEMORY", "spark.executor.memory"), ("--executor-cores", "SPARK_WORKER_CORES", "spark.executor.cores"), ("--executor-cores", "SPARK_EXECUTOR_CORES", "spark.executor.cores"), - ("--queue", "SPARK_YARN_QUEUE", "spark.yarn.queue"), + ("--queue", "SPARK_YARN_QUEUE", "spark.yarn.queue") ) // Warn against the following deprecated environment variables: env var -> suggestion val deprecatedEnvVars = Map( From 836b9ef13ef442109ba978da23309f7679405e2f Mon Sep 17 00:00:00 2001 From: WangTaoTheTonic Date: Mon, 12 Jan 2015 12:13:35 +0800 Subject: [PATCH 6/6] fix type mismatch --- .../scala/org/apache/spark/deploy/SparkSubmitArguments.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 09b8be5f6ec3..da7faaa07281 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -151,7 +151,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St // In yarn mode, app name can be set via SPARK_YARN_APP_NAME if (master.contains("yarn")) { - name = Option(name).orElse(env.get("SPARK_YARN_APP_NAME")) + name = Option(name).orElse(env.get("SPARK_YARN_APP_NAME")).orNull } // Set name from main class if not given name = Option(name).orElse(Option(mainClass)).orNull