From ace2fbc5e66a6f10e3f0bde939958c178d22c7eb Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 31 May 2017 22:17:05 -0700 Subject: [PATCH 1/2] simplify the config setting logic in SparkSession.getOrCreate --- .../apache/spark/ml/tree/impl/TreeTests.scala | 1 - .../org/apache/spark/sql/SparkSession.scala | 23 ++++++------------- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala index 92a236928e90..b93262ae3c5f 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala @@ -43,7 +43,6 @@ private[ml] object TreeTests extends SparkFunSuite { categoricalFeatures: Map[Int, Int], numClasses: Int): DataFrame = { val spark = SparkSession.builder() - .master("local[2]") .appName("TreeTests") .sparkContext(data.sparkContext) .getOrCreate() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index d2bf35071193..f949f8f47085 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -854,7 +854,7 @@ object SparkSession { * * @since 2.2.0 */ - def withExtensions(f: SparkSessionExtensions => Unit): Builder = { + def withExtensions(f: SparkSessionExtensions => Unit): Builder = synchronized { f(extensions) this } @@ -899,22 +899,14 @@ object SparkSession { // No active nor global default session. Create a new one. val sparkContext = userSuppliedContext.getOrElse { - // set app name if not given - val randomAppName = java.util.UUID.randomUUID().toString val sparkConf = new SparkConf() - options.foreach { case (k, v) => sparkConf.set(k, v) } - if (!sparkConf.contains("spark.app.name")) { - sparkConf.setAppName(randomAppName) - } - val sc = SparkContext.getOrCreate(sparkConf) - // maybe this is an existing SparkContext, update its SparkConf which maybe used - // by SparkSession - options.foreach { case (k, v) => sc.conf.set(k, v) } - if (!sc.conf.contains("spark.app.name")) { - sc.conf.setAppName(randomAppName) - } - sc + options.get("spark.master").foreach(sparkConf.setMaster) + // set a random app name if not given. + sparkConf.setAppName(options.getOrElse("spark.app.name", + java.util.UUID.randomUUID().toString)) + SparkContext.getOrCreate(sparkConf) } + options.foreach { case (k, v) => sparkContext.conf.set(k, v) } // Initialize extensions if the user has defined a configurator class. val extensionConfOption = sparkContext.conf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS) @@ -935,7 +927,6 @@ object SparkSession { } session = new SparkSession(sparkContext, None, None, extensions) - options.foreach { case (k, v) => session.sessionState.conf.setConfString(k, v) } defaultSession.set(session) // Register a successfully instantiated context to the singleton. This should be at the From a8fe8b083d96b59ff209465694edabd333b73402 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 1 Jun 2017 20:41:42 -0700 Subject: [PATCH 2/2] address comments --- .../scala/org/apache/spark/ml/recommendation/ALSSuite.scala | 4 +--- .../test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala | 1 - .../src/main/scala/org/apache/spark/sql/SparkSession.scala | 2 ++ 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index 9d31e792633c..55eb4db5de05 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -772,15 +772,13 @@ class ALSCleanerSuite extends SparkFunSuite { FileUtils.listFiles(localDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet try { conf.set("spark.local.dir", localDir.getAbsolutePath) - val sc = new SparkContext("local[2]", "test", conf) + val sc = new SparkContext("local[2]", "ALSCleanerSuite", conf) try { sc.setCheckpointDir(checkpointDir.getAbsolutePath) // Generate test data val (training, _) = ALSSuite.genImplicitTestData(sc, 20, 5, 1, 0.2, 0) // Implicitly test the cleaning of parents during ALS training val spark = SparkSession.builder - .master("local[2]") - .appName("ALSCleanerSuite") .sparkContext(sc) .getOrCreate() import spark.implicits._ diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala index b93262ae3c5f..b6894b30b0c2 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala @@ -43,7 +43,6 @@ private[ml] object TreeTests extends SparkFunSuite { categoricalFeatures: Map[Int, Int], numClasses: Int): DataFrame = { val spark = SparkSession.builder() - .appName("TreeTests") .sparkContext(data.sparkContext) .getOrCreate() import spark.implicits._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index f949f8f47085..bf37b7608747 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -757,6 +757,8 @@ object SparkSession { private[this] var userSuppliedContext: Option[SparkContext] = None + // The `SparkConf` inside the given `SparkContext` may get changed if you specify some options + // for this builder. private[spark] def sparkContext(sparkContext: SparkContext): Builder = synchronized { userSuppliedContext = Option(sparkContext) this