-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-32516][SQL][FOLLOWUP] 'path' option cannot coexist with path parameter for DataFrameWriter.save(), DataStreamReader.load() and DataStreamWriter.start() #29543
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -109,6 +109,7 @@ class DefaultSource extends StreamSourceProvider with StreamSinkProvider { | |||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter { | ||||||||||||||||
| import testImplicits._ | ||||||||||||||||
|
|
||||||||||||||||
| private def newMetadataDir = | ||||||||||||||||
| Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath | ||||||||||||||||
|
|
@@ -435,7 +436,6 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter { | |||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| private def testMemorySinkCheckpointRecovery(chkLoc: String, provideInWriter: Boolean): Unit = { | ||||||||||||||||
| import testImplicits._ | ||||||||||||||||
| val ms = new MemoryStream[Int](0, sqlContext) | ||||||||||||||||
| val df = ms.toDF().toDF("a") | ||||||||||||||||
| val tableName = "test" | ||||||||||||||||
|
|
@@ -703,4 +703,53 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter { | |||||||||||||||
| queries.foreach(_.stop()) | ||||||||||||||||
| } | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| test("SPARK-32516: 'path' cannot coexist with load()'s path parameter") { | ||||||||||||||||
| def verifyLoadFails(f: => DataFrame): Unit = { | ||||||||||||||||
| val e = intercept[AnalysisException](f) | ||||||||||||||||
| assert(e.getMessage.contains( | ||||||||||||||||
| "Either remove the path option, or call load() without the parameter")) | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| verifyLoadFails(spark.readStream.option("path", "tmp1").parquet("tmp2")) | ||||||||||||||||
| verifyLoadFails(spark.readStream.option("path", "tmp1").format("parquet").load("tmp2")) | ||||||||||||||||
|
|
||||||||||||||||
| withClue("SPARK-32516: legacy behavior") { | ||||||||||||||||
| withSQLConf(SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key -> "true") { | ||||||||||||||||
| spark.readStream | ||||||||||||||||
| .format("org.apache.spark.sql.streaming.test") | ||||||||||||||||
| .option("path", "tmp1") | ||||||||||||||||
| .load("tmp2") | ||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be flaky. The directory of tmp2 could be non-empty and contains illegal data.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is using spark/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala Lines 58 to 59 in f7995c5
This datasource is being used throughout this test suite with non-existent dirs: spark/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala Lines 227 to 231 in f7995c5
Am I missing something? Thanks!
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right, I think this is not an issue here. |
||||||||||||||||
| // The legacy behavior overwrites the path option. | ||||||||||||||||
| assert(LastOptions.parameters("path") == "tmp2") | ||||||||||||||||
| } | ||||||||||||||||
| } | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| test("SPARK-32516: 'path' cannot coexist with start()'s path parameter") { | ||||||||||||||||
| val df = spark.readStream | ||||||||||||||||
| .format("org.apache.spark.sql.streaming.test") | ||||||||||||||||
| .load("tmp1") | ||||||||||||||||
|
|
||||||||||||||||
| val e = intercept[AnalysisException] { | ||||||||||||||||
| df.writeStream | ||||||||||||||||
| .format("org.apache.spark.sql.streaming.test") | ||||||||||||||||
| .option("path", "tmp2") | ||||||||||||||||
| .start("tmp3") | ||||||||||||||||
| .stop() | ||||||||||||||||
| } | ||||||||||||||||
| assert(e.getMessage.contains( | ||||||||||||||||
| "Either remove the path option, or call start() without the parameter")) | ||||||||||||||||
|
|
||||||||||||||||
| withClue("SPARK-32516: legacy behavior") { | ||||||||||||||||
| withSQLConf(SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key -> "true") { | ||||||||||||||||
| spark.readStream | ||||||||||||||||
| .format("org.apache.spark.sql.streaming.test") | ||||||||||||||||
| .option("path", "tmp4") | ||||||||||||||||
| .load("tmp5") | ||||||||||||||||
| // The legacy behavior overwrites the path option. | ||||||||||||||||
| assert(LastOptions.parameters("path") == "tmp5") | ||||||||||||||||
| } | ||||||||||||||||
| } | ||||||||||||||||
| } | ||||||||||||||||
| } | ||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
pathhere is a String, do we really need to checkpath.nonEmpty?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch. I think we need to remove the check. The output is more confusing since
pathoption is also set:I will create a PR for this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Created #29697