-
Notifications
You must be signed in to change notification settings - Fork 29.3k
[SPARK-25525][SQL][PYSPARK] Do not update conf for existing SparkContext in SparkSession.getOrCreate. #22545
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -83,6 +83,7 @@ class Builder(object): | |
|
|
||
| _lock = RLock() | ||
| _options = {} | ||
| _sc = None | ||
|
|
||
| @since(2.0) | ||
| def config(self, key=None, value=None, conf=None): | ||
|
|
@@ -139,6 +140,10 @@ def enableHiveSupport(self): | |
| """ | ||
| return self.config("spark.sql.catalogImplementation", "hive") | ||
|
|
||
| def _sparkContext(self, sc): | ||
| self._sc = sc | ||
| return self | ||
|
|
||
| @since(2.0) | ||
| def getOrCreate(self): | ||
| """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a | ||
|
|
@@ -150,7 +155,7 @@ def getOrCreate(self): | |
| default. | ||
|
|
||
| >>> s1 = SparkSession.builder.config("k1", "v1").getOrCreate() | ||
| >>> s1.conf.get("k1") == s1.sparkContext.getConf().get("k1") == "v1" | ||
| >>> s1.conf.get("k1") == "v1" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ueshin Could we also update the migration guide about this change?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In that case, we might have to put the behaviour changes by #18536 together to the migration guide as well.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can do it together.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Submitted a pr to update the migration guide #22682. |
||
| True | ||
|
|
||
| In case an existing SparkSession is returned, the config options specified | ||
|
|
@@ -167,22 +172,19 @@ def getOrCreate(self): | |
| from pyspark.conf import SparkConf | ||
| session = SparkSession._instantiatedSession | ||
| if session is None or session._sc._jsc is None: | ||
| sparkConf = SparkConf() | ||
| for key, value in self._options.items(): | ||
| sparkConf.set(key, value) | ||
| sc = SparkContext.getOrCreate(sparkConf) | ||
| # This SparkContext may be an existing one. | ||
| for key, value in self._options.items(): | ||
| # we need to propagate the confs | ||
| # before we create the SparkSession. Otherwise, confs like | ||
| # warehouse path and metastore url will not be set correctly ( | ||
| # these confs cannot be changed once the SparkSession is created). | ||
| sc._conf.set(key, value) | ||
| if self._sc is not None: | ||
| sc = self._sc | ||
| else: | ||
| sparkConf = SparkConf() | ||
| for key, value in self._options.items(): | ||
| sparkConf.set(key, value) | ||
| sc = SparkContext.getOrCreate(sparkConf) | ||
| # This SparkContext may be an existing one. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tiny nit: can we move this comment above |
||
| # Do not update `SparkConf` for existing `SparkContext`, as it's shared | ||
| # by all sessions. | ||
| session = SparkSession(sc) | ||
| for key, value in self._options.items(): | ||
| session._jsparkSession.sessionState().conf().setConfString(key, value) | ||
| for key, value in self._options.items(): | ||
| session.sparkContext._conf.set(key, value) | ||
| return session | ||
|
|
||
| builder = Builder() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why this change?