-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-20946][SQL] Do not update conf for existing SparkContext in SparkSession.getOrCreate #18536
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -818,15 +818,13 @@ class ALSCleanerSuite extends SparkFunSuite { | |
| FileUtils.listFiles(localDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet | ||
| try { | ||
| conf.set("spark.local.dir", localDir.getAbsolutePath) | ||
| val sc = new SparkContext("local[2]", "test", conf) | ||
| val sc = new SparkContext("local[2]", "ALSCleanerSuite", conf) | ||
| try { | ||
| sc.setCheckpointDir(checkpointDir.getAbsolutePath) | ||
| // Generate test data | ||
| val (training, _) = ALSSuite.genImplicitTestData(sc, 20, 5, 1, 0.2, 0) | ||
| // Implicitly test the cleaning of parents during ALS training | ||
| val spark = SparkSession.builder | ||
| .master("local[2]") | ||
| .appName("ALSCleanerSuite") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same to |
||
| .sparkContext(sc) | ||
| .getOrCreate() | ||
| import spark.implicits._ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -867,7 +867,7 @@ object SparkSession { | |
| * | ||
| * @since 2.2.0 | ||
| */ | ||
| def withExtensions(f: SparkSessionExtensions => Unit): Builder = { | ||
| def withExtensions(f: SparkSessionExtensions => Unit): Builder = synchronized { | ||
| f(extensions) | ||
| this | ||
| } | ||
|
|
@@ -912,21 +912,16 @@ object SparkSession { | |
|
|
||
| // No active nor global default session. Create a new one. | ||
| val sparkContext = userSuppliedContext.getOrElse { | ||
| // set app name if not given | ||
| val randomAppName = java.util.UUID.randomUUID().toString | ||
| val sparkConf = new SparkConf() | ||
| options.foreach { case (k, v) => sparkConf.set(k, v) } | ||
|
|
||
| // set a random app name if not given. | ||
| if (!sparkConf.contains("spark.app.name")) { | ||
| sparkConf.setAppName(randomAppName) | ||
| } | ||
| val sc = SparkContext.getOrCreate(sparkConf) | ||
| // maybe this is an existing SparkContext, update its SparkConf which maybe used | ||
| // by SparkSession | ||
| options.foreach { case (k, v) => sc.conf.set(k, v) } | ||
| if (!sc.conf.contains("spark.app.name")) { | ||
| sc.conf.setAppName(randomAppName) | ||
| sparkConf.setAppName(java.util.UUID.randomUUID().toString) | ||
| } | ||
| sc | ||
|
|
||
| SparkContext.getOrCreate(sparkConf) | ||
| // Do not update `SparkConf` for existing `SparkContext`, as it's shared by all sessions. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then we may need to update
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that document is for normal cases, we won't set conf for existing |
||
| } | ||
|
|
||
| // Initialize extensions if the user has defined a configurator class. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -102,11 +102,9 @@ class SparkSessionBuilderSuite extends SparkFunSuite { | |
| assert(session.conf.get("key1") == "value1") | ||
| assert(session.conf.get("key2") == "value2") | ||
| assert(session.sparkContext == sparkContext2) | ||
| assert(session.sparkContext.conf.get("key1") == "value1") | ||
| // If the created sparkContext is not passed through the Builder's API sparkContext, | ||
| // the conf of this sparkContext will also contain the conf set through the API config. | ||
| assert(session.sparkContext.conf.get("key2") == "value2") | ||
| assert(session.sparkContext.conf.get("spark.app.name") == "test") | ||
| // We won't update conf for existing `SparkContext` | ||
| assert(!sparkContext2.conf.contains("key2")) | ||
| assert(sparkContext2.conf.get("key1") == "value1") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This PR makes the behaviors consistent. To build a SparkSession, we behave consistently no matter whether the spark context is implicitly chosen or explicitly passed by users. I think we need to fix it. |
||
| session.stop() | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If users set it to
local[4]that is different from the pre-built spark context, I think we need to issue an exception? If they are identical, maybe we can simply ignore it? cc @rxinThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The current behavior is, ignore these configs and give a warning log
Using an existing SparkContext; some configuration may not take effect.This PR doesn't change this behavior.