-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-29721][SQL] Prune unnecessary nested fields from Generate without Project #26978
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
86a4cfa
296293c
06d2b80
f9abd6d
fd7d9bb
19f7cd4
a9f21be
3e4218d
35b32ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -301,6 +301,38 @@ abstract class SchemaPruningSuite | |
| checkAnswer(query, Row("Y.", 1) :: Row("X.", 1) :: Row(null, 2) :: Row(null, 2) :: Nil) | ||
| } | ||
|
|
||
| testSchemaPruning("select explode of nested field of array of struct") { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the reason why we did not capture the bug is our tests are not well designed and reviewed. We have to be super careful when we review the tests and then it will be much easier to find the bugs.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for catching it and pinging me. Let me look at it.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Opened #27503 to fix it. |
||
| // Config combinations | ||
| val configs = Seq((true, true), (true, false), (false, true), (false, false)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! |
||
|
|
||
| configs.foreach { case (nestedPruning, nestedPruningOnExpr) => | ||
| withSQLConf( | ||
| SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> nestedPruning.toString, | ||
| SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> nestedPruningOnExpr.toString) { | ||
| val query1 = spark.table("contacts") | ||
| .select(explode(col("friends.first"))) | ||
| if (nestedPruning) { | ||
| // If `NESTED_SCHEMA_PRUNING_ENABLED` is enabled, | ||
| // even disabling `NESTED_PRUNING_ON_EXPRESSIONS`, | ||
| // nested schema is still pruned at scan node. | ||
| checkScan(query1, "struct<friends:array<struct<first:string>>>") | ||
| } else { | ||
| checkScan(query1, "struct<friends:array<struct<first:string,middle:string,last:string>>>") | ||
| } | ||
| checkAnswer(query1, Row("Susan") :: Nil) | ||
|
|
||
| val query2 = spark.table("contacts") | ||
| .select(explode(col("friends.first")), col("friends.middle")) | ||
| if (nestedPruning) { | ||
| checkScan(query2, "struct<friends:array<struct<first:string,middle:string>>>") | ||
| } else { | ||
| checkScan(query2, "struct<friends:array<struct<first:string,middle:string,last:string>>>") | ||
| } | ||
| checkAnswer(query2, Row("Susan", Array("Z.")) :: Nil) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| protected def testSchemaPruning(testName: String)(testThunk: => Unit): Unit = { | ||
| test(s"Spark vectorized reader - without partition data column - $testName") { | ||
| withSQLConf(vectorizedReaderEnabledKey -> "true") { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.