From 9f287c5a3a2e527b8d2fed45ca3b711e0085f841 Mon Sep 17 00:00:00 2001 From: ehnalis Date: Tue, 12 May 2015 17:13:05 +0200 Subject: [PATCH 1/6] [SPARK-7504] [YARN] NullPointerException when initializing SparkContext in YARN-cluster mode Added a simple checking for SparkContext. Also added two rational checking against null at AM object. --- core/src/main/scala/org/apache/spark/SparkContext.scala | 7 +++++++ .../org/apache/spark/deploy/yarn/ApplicationMaster.scala | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index b59f562d05ea..585499482434 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -371,6 +371,13 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli throw new SparkException("An application name must be set in your configuration") } + // Thread name has been set to "Driver" if user code ran by AM on a YARN cluster + if (master == "yarn-cluster" && + Thread.currentThread().getName != "Driver") { + throw new SparkException("Detected yarn-cluster mode, but isn't running on a cluster. " + + "Deployment to YARN is not supported directly by SparkContext. Please use spark-submit.") + } + if (_conf.getBoolean("spark.logConf", false)) { logInfo("Spark configuration:\n" + _conf.toDebugString) } diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 29752969e615..5a4f14bdcd80 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -453,6 +453,7 @@ private[spark] class ApplicationMaster( private def startUserApplication(): Thread = { logInfo("Starting the user application in a separate Thread") System.setProperty("spark.executor.instances", args.numExecutors.toString) + // System.setProperty("spark.yarn.am.thread", "yarn-cluster") val classpath = Client.getUserClasspath(sparkConf) val urls = classpath.map { entry => @@ -573,13 +574,18 @@ object ApplicationMaster extends Logging { } private[spark] def sparkContextInitialized(sc: SparkContext): Unit = { + if (master == null){ + throw new SparkException("ApplicationMaster is not initialized!") + } master.sparkContextInitialized(sc) } private[spark] def sparkContextStopped(sc: SparkContext): Boolean = { + if (master == null){ + throw new SparkException("ApplicationMaster is not initialized!") + } master.sparkContextStopped(sc) } - } /** From 39e4fa3d1b65cdfc012e2890ebfa07ea3addfda6 Mon Sep 17 00:00:00 2001 From: ehnalis Date: Tue, 12 May 2015 17:14:26 +0200 Subject: [PATCH 2/6] SPARK-7504 [YARN] NullPointerException when initializing SparkContext in YARN-cluster mode Removed unnecessary line. --- .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 5a4f14bdcd80..096a49fe9d24 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -453,7 +453,6 @@ private[spark] class ApplicationMaster( private def startUserApplication(): Thread = { logInfo("Starting the user application in a separate Thread") System.setProperty("spark.executor.instances", args.numExecutors.toString) - // System.setProperty("spark.yarn.am.thread", "yarn-cluster") val classpath = Client.getUserClasspath(sparkConf) val urls = classpath.map { entry => From 4924e01eca339428922e4d9063603579fdc1c9fe Mon Sep 17 00:00:00 2001 From: ehnalis Date: Tue, 12 May 2015 17:16:00 +0200 Subject: [PATCH 3/6] [SPARK-7504] [YARN] NullPointerException when initializing SparkContext in YARN-cluster mode Added line-break. --- .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 096a49fe9d24..6cb375c122ba 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -585,6 +585,7 @@ object ApplicationMaster extends Logging { } master.sparkContextStopped(sc) } + } /** From ea2a5fe2de5cac37ce09b4426268a53d231657cb Mon Sep 17 00:00:00 2001 From: ehnalis Date: Tue, 12 May 2015 22:07:50 +0200 Subject: [PATCH 4/6] [SPARK-7504] [YARN] NullPointerException when initializing SparkContext in YARN-cluster mode Removed checking for null in AM. Refactored configuration-check to SparkConf. --- core/src/main/scala/org/apache/spark/SparkConf.scala | 9 +++++++++ core/src/main/scala/org/apache/spark/SparkContext.scala | 8 +------- .../org/apache/spark/deploy/yarn/ApplicationMaster.scala | 6 ------ 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index a8fc90ad2050..395263ae1d92 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -448,6 +448,15 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging { } } } + + // System property spark.yarn.app.id must be set if user code ran by AM on a YARN cluster + // yarn-standalone is deprecated, but still supported + if ((get("spark.master") == "yarn-cluster" || get("spark.master") == "yarn-standalone") && + get("spark.yarn.app.id", null) == null) { + throw new SparkException("Detected yarn-cluster mode, but isn't running on a cluster. " + + "Deployment to YARN is not supported directly by SparkContext. Please use spark-submit.") + } + } /** diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 585499482434..238815aea63e 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -371,13 +371,6 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli throw new SparkException("An application name must be set in your configuration") } - // Thread name has been set to "Driver" if user code ran by AM on a YARN cluster - if (master == "yarn-cluster" && - Thread.currentThread().getName != "Driver") { - throw new SparkException("Detected yarn-cluster mode, but isn't running on a cluster. " + - "Deployment to YARN is not supported directly by SparkContext. Please use spark-submit.") - } - if (_conf.getBoolean("spark.logConf", false)) { logInfo("Spark configuration:\n" + _conf.toDebugString) } @@ -1477,6 +1470,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli def addJar(path: String) { if (path == null) { logWarning("null specified as parameter to addJar") + // yarn-standalone is deprecated, but still supported } else { var key = "" if (path.contains("\\")) { diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 6cb375c122ba..29752969e615 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -573,16 +573,10 @@ object ApplicationMaster extends Logging { } private[spark] def sparkContextInitialized(sc: SparkContext): Unit = { - if (master == null){ - throw new SparkException("ApplicationMaster is not initialized!") - } master.sparkContextInitialized(sc) } private[spark] def sparkContextStopped(sc: SparkContext): Boolean = { - if (master == null){ - throw new SparkException("ApplicationMaster is not initialized!") - } master.sparkContextStopped(sc) } From 7c89b6e62732b5a7af56aa290164b56b65a9a8b5 Mon Sep 17 00:00:00 2001 From: ehnalis Date: Tue, 12 May 2015 22:10:17 +0200 Subject: [PATCH 5/6] Remove false line. --- core/src/main/scala/org/apache/spark/SparkContext.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 238815aea63e..b59f562d05ea 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1470,7 +1470,6 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli def addJar(path: String) { if (path == null) { logWarning("null specified as parameter to addJar") - // yarn-standalone is deprecated, but still supported } else { var key = "" if (path.contains("\\")) { From 926bd96925603b528b25e739f2b593c815e4de68 Mon Sep 17 00:00:00 2001 From: ehnalis Date: Tue, 12 May 2015 22:57:47 +0200 Subject: [PATCH 6/6] Moved check to SparkContext. --- core/src/main/scala/org/apache/spark/SparkConf.scala | 8 -------- core/src/main/scala/org/apache/spark/SparkContext.scala | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index 395263ae1d92..c88cc23c6d60 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -449,14 +449,6 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging { } } - // System property spark.yarn.app.id must be set if user code ran by AM on a YARN cluster - // yarn-standalone is deprecated, but still supported - if ((get("spark.master") == "yarn-cluster" || get("spark.master") == "yarn-standalone") && - get("spark.yarn.app.id", null) == null) { - throw new SparkException("Detected yarn-cluster mode, but isn't running on a cluster. " + - "Deployment to YARN is not supported directly by SparkContext. Please use spark-submit.") - } - } /** diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index b59f562d05ea..af276e7b8d40 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -371,6 +371,14 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli throw new SparkException("An application name must be set in your configuration") } + // System property spark.yarn.app.id must be set if user code ran by AM on a YARN cluster + // yarn-standalone is deprecated, but still supported + if ((master == "yarn-cluster" || master == "yarn-standalone") && + !_conf.contains("spark.yarn.app.id")) { + throw new SparkException("Detected yarn-cluster mode, but isn't running on a cluster. " + + "Deployment to YARN is not supported directly by SparkContext. Please use spark-submit.") + } + if (_conf.getBoolean("spark.logConf", false)) { logInfo("Spark configuration:\n" + _conf.toDebugString) }