-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-25700][SQL] Creates ReadSupport in only Append Mode in Data Source V2 write path #22688
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
553fea1
b28afe2
fa69f9c
ded852c
2a42253
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -351,6 +351,24 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext { | |
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-25700: do not read schema when writing in other modes except append mode") { | ||
| withTempPath { file => | ||
| val cls = classOf[SimpleWriteOnlyDataSource] | ||
| val path = file.getCanonicalPath | ||
| val df = spark.range(5).select('id as 'i, -'id as 'j) | ||
| try { | ||
| df.write.format(cls.getName).option("path", path).mode("error").save() | ||
| df.write.format(cls.getName).option("path", path).mode("overwrite").save() | ||
| df.write.format(cls.getName).option("path", path).mode("ignore").save() | ||
| } catch { | ||
| case e: SchemaReadAttemptException => fail("Schema read was attempted.", e) | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To validate new code path line 250, could you add
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup |
||
| intercept[SchemaReadAttemptException] { | ||
| df.write.format(cls.getName).option("path", path).mode("append").save() | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
|
|
@@ -640,3 +658,14 @@ object SpecificReaderFactory extends PartitionReaderFactory { | |
| } | ||
| } | ||
| } | ||
|
|
||
| class SchemaReadAttemptException(m: String) extends RuntimeException(m) | ||
|
|
||
| class SimpleWriteOnlyDataSource extends SimpleWritableDataSource { | ||
| override def fullSchema(): StructType = { | ||
| // This is a bit hacky since this source implements read support but throws | ||
| // during schema retrieval. Might have to rewrite but it's done | ||
| // such so for minimised changes. | ||
| throw new SchemaReadAttemptException("read is not supported") | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,13 +43,13 @@ class SimpleWritableDataSource extends DataSourceV2 | |
| with BatchWriteSupportProvider | ||
| with SessionConfigSupport { | ||
|
|
||
| private val schema = new StructType().add("i", "long").add("j", "long") | ||
| protected def fullSchema(): StructType = new StructType().add("i", "long").add("j", "long") | ||
|
|
||
| override def keyPrefix: String = "simpleWritableDataSource" | ||
|
|
||
| class ReadSupport(path: String, conf: Configuration) extends SimpleReadSupport { | ||
|
|
||
| override def fullSchema(): StructType = schema | ||
| override def fullSchema(): StructType = SimpleWritableDataSource.this.fullSchema() | ||
|
|
||
| override def planInputPartitions(config: ScanConfig): Array[InputPartition] = { | ||
| val dataPath = new Path(path) | ||
|
|
@@ -116,7 +116,6 @@ class SimpleWritableDataSource extends DataSourceV2 | |
| schema: StructType, | ||
| mode: SaveMode, | ||
| options: DataSourceOptions): Optional[BatchWriteSupport] = { | ||
| assert(DataType.equalsStructurally(schema.asNullable, this.schema.asNullable)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For modes other than Append, I think we still need this assert, don't we?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea .. but it's in test code and just sanity check.. |
||
| assert(!SparkContext.getActive.get.conf.getBoolean("spark.speculation", false)) | ||
|
|
||
| val path = new Path(options.get("path").get()) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The write path looks requiring two columns:
spark/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
Line 214 in e06da95