-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-23500][SQL] Fix complex type simplification rules to apply to entire plan #20687
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,14 +44,13 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { | |
| BooleanSimplification, | ||
| SimplifyConditionals, | ||
| SimplifyBinaryComparison, | ||
| SimplifyCreateStructOps, | ||
| SimplifyCreateArrayOps, | ||
| SimplifyCreateMapOps) :: Nil | ||
| SimplifyExtractValueOps) :: Nil | ||
| } | ||
|
|
||
| val idAtt = ('id).long.notNull | ||
| val nullableIdAtt = ('nullable_id).long | ||
|
|
||
| lazy val relation = LocalRelation(idAtt ) | ||
| lazy val relation = LocalRelation(idAtt, nullableIdAtt) | ||
|
|
||
| test("explicit get from namedStruct") { | ||
| val query = relation | ||
|
|
@@ -321,7 +320,7 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { | |
| .select( | ||
| CaseWhen(Seq( | ||
| (EqualTo(2L, 'id), ('id + 1L)), | ||
| // these two are possible matches, we can't tell untill runtime | ||
| // these two are possible matches, we can't tell until runtime | ||
| (EqualTo(2L, ('id + 1L)), ('id + 2L)), | ||
| (EqualTo(2L, 'id + 2L), Literal.create(null, LongType)), | ||
| // this is a definite match (two constants), | ||
|
|
@@ -331,4 +330,47 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { | |
| .analyze | ||
| comparePlans(Optimizer execute rel, expected) | ||
| } | ||
|
|
||
| test("SPARK-23500: Simplify complex ops that aren't at the plan root") { | ||
| val structRel = relation | ||
| .select(GetStructField(CreateNamedStruct(Seq("att1", 'nullable_id)), 0, None) as "foo") | ||
| .groupBy($"foo")("1").analyze | ||
| val structExpected = relation | ||
| .select('nullable_id as "foo") | ||
| .groupBy($"foo")("1").analyze | ||
| comparePlans(Optimizer execute structRel, structExpected) | ||
|
|
||
| // These tests must use nullable attributes from the base relation for the following reason: | ||
| // in the 'original' plans below, the Aggregate node produced by groupBy() has a | ||
| // nullable AttributeReference to a1, because both array indexing and map lookup are | ||
| // nullable expressions. After optimization, the same attribute is now non-nullable, | ||
| // but the AttributeReference is not updated to reflect this. In the 'expected' plans, | ||
| // the grouping expressions have the same nullability as the original attribute in the | ||
| // relation. If that attribute is non-nullable, the tests will fail as the plans will | ||
| // compare differently, so for these tests we must use a nullable attribute. See | ||
| // SPARK-23634. | ||
| val arrayRel = relation | ||
| .select(GetArrayItem(CreateArray(Seq('nullable_id, 'nullable_id + 1L)), 0) as "a1") | ||
| .groupBy($"a1")("1").analyze | ||
| val arrayExpected = relation.select('nullable_id as "a1").groupBy($"a1")("1").analyze | ||
| comparePlans(Optimizer execute arrayRel, arrayExpected) | ||
|
|
||
| val mapRel = relation | ||
| .select(GetMapValue(CreateMap(Seq("id", 'nullable_id)), "id") as "m1") | ||
| .groupBy($"m1")("1").analyze | ||
| val mapExpected = relation | ||
| .select('nullable_id as "m1") | ||
| .groupBy($"m1")("1").analyze | ||
| comparePlans(Optimizer execute mapRel, mapExpected) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @henryr . val structRel = relation.groupBy(
CreateNamedStruct(Seq("att1", 'nullable_id)))(
GetStructField(CreateNamedStruct(Seq("att1", 'nullable_id)), 0, None)).analyze
comparePlans(Optimizer execute structRel, structRel) |
||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that the current test case become too long. For the following negative cases, let's split to another test case. Maybe, with the following title? |
||
| // Make sure that aggregation exprs are correctly ignored. Maps can't be used in grouping exprs. | ||
| val structAggRel = relation.groupBy( | ||
| CreateNamedStruct(Seq("att1", 'nullable_id)))( | ||
| GetStructField(CreateNamedStruct(Seq("att1", 'nullable_id)), 0, None)).analyze | ||
| comparePlans(Optimizer execute structAggRel, structAggRel) | ||
|
|
||
| val arrayAggRel = relation.groupBy( | ||
| CreateArray(Seq('nullable_id)))(GetArrayItem(CreateArray(Seq('nullable_id)), 0)).analyze | ||
| comparePlans(Optimizer execute arrayAggRel, arrayAggRel) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be before line 21 in alphabetical order.
You can check this locally with
dev/scalastyle.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.