-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-8972][SQL]Incorrect result for rollup #7343
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -193,16 +193,52 @@ class Analyzer( | |
| } | ||
|
|
||
| def apply(plan: LogicalPlan): LogicalPlan = plan transform { | ||
| case a if !a.childrenResolved => a // be sure all of the children are resolved. | ||
| case a: Cube => | ||
| GroupingSets(bitmasks(a), a.groupByExprs, a.child, a.aggregations) | ||
| case a: Rollup => | ||
| GroupingSets(bitmasks(a), a.groupByExprs, a.child, a.aggregations) | ||
| case x: GroupingSets => | ||
| val gid = AttributeReference(VirtualColumn.groupingIdName, IntegerType, false)() | ||
| // We will insert another Projection if the GROUP BY keys contains the | ||
| // non-attribute expressions. And the top operators can references those | ||
| // expressions by its alias. | ||
| // e.g. SELECT key%5 as c1 FROM src GROUP BY key%5 ==> | ||
| // SELECT a as c1 FROM (SELECT key%5 AS a FROM src) GROUP BY a | ||
|
|
||
| // find all of the non-attribute expressions in the GROUP BY keys | ||
| val nonAttributeGroupByExpressions = new ArrayBuffer[Alias]() | ||
|
|
||
| // The pair of (the original GROUP BY key, associated attribute) | ||
| val groupByExprPairs = x.groupByExprs.map(_ match { | ||
| case e: NamedExpression => (e, e.toAttribute) | ||
| case other => { | ||
| val alias = Alias(other, other.toString)() | ||
| nonAttributeGroupByExpressions += alias // add the non-attributes expression alias | ||
| (other, alias.toAttribute) | ||
| } | ||
| }) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably we want to keep the order group by expressions in the Aggregate operator, let's just keep it as it used to be. |
||
|
|
||
| // substitute the non-attribute expressions for aggregations. | ||
| val aggregation = x.aggregations.map(expr => expr.transformDown { | ||
| case e => groupByExprPairs.find(_._1.semanticEquals(e)).map(_._2).getOrElse(e) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can create a map from
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As the For example:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok thanks |
||
| }.asInstanceOf[NamedExpression]) | ||
|
|
||
| // substitute the group by expressions. | ||
| val newGroupByExprs = groupByExprPairs.map(_._2) | ||
|
|
||
| val child = if (nonAttributeGroupByExpressions.length > 0) { | ||
| // insert additional projection if contains the | ||
| // non-attribute expressions in the GROUP BY keys | ||
| Project(x.child.output ++ nonAttributeGroupByExpressions, x.child) | ||
| } else { | ||
| x.child | ||
| } | ||
|
|
||
| Aggregate( | ||
| x.groupByExprs :+ VirtualColumn.groupingIdAttribute, | ||
| x.aggregations, | ||
| Expand(x.bitmasks, x.groupByExprs, gid, x.child)) | ||
| newGroupByExprs :+ VirtualColumn.groupingIdAttribute, | ||
| aggregation, | ||
| Expand(x.bitmasks, newGroupByExprs, gid, child)) | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| 500 NULL 0 | ||
| 91 0 1 | ||
| 84 1 1 | ||
| 105 2 1 | ||
| 113 3 1 | ||
| 107 4 1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| 1 NULL -3 2 | ||
| 1 NULL -1 2 | ||
| 1 NULL 3 2 | ||
| 1 NULL 4 2 | ||
| 1 NULL 5 2 | ||
| 1 NULL 6 2 | ||
| 1 NULL 12 2 | ||
| 1 NULL 14 2 | ||
| 1 NULL 15 2 | ||
| 1 NULL 22 2 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| 1 NULL -3 2 | ||
| 1 NULL -1 2 | ||
| 1 NULL 3 2 | ||
| 1 NULL 4 2 | ||
| 1 NULL 5 2 | ||
| 1 NULL 6 2 | ||
| 1 NULL 12 2 | ||
| 1 NULL 14 2 | ||
| 1 NULL 15 2 | ||
| 1 NULL 22 2 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| 500 NULL 0 | ||
| 91 0 1 | ||
| 84 1 1 | ||
| 105 2 1 | ||
| 113 3 1 | ||
| 107 4 1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| 1 0 5 3 | ||
| 1 0 15 3 | ||
| 1 0 25 3 | ||
| 1 0 60 3 | ||
| 1 0 75 3 | ||
| 1 0 80 3 | ||
| 1 0 100 3 | ||
| 1 0 140 3 | ||
| 1 0 145 3 | ||
| 1 0 150 3 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| 1 0 5 3 | ||
| 1 0 15 3 | ||
| 1 0 25 3 | ||
| 1 0 60 3 | ||
| 1 0 75 3 | ||
| 1 0 80 3 | ||
| 1 0 100 3 | ||
| 1 0 140 3 | ||
| 1 0 145 3 | ||
| 1 0 150 3 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -85,6 +85,60 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { | |
| } | ||
| } | ||
|
|
||
| createQueryTest("SPARK-8976 Wrong Result for Rollup #1", | ||
| """ | ||
| SELECT count(*) AS cnt, key % 5,GROUPING__ID FROM src group by key%5 WITH ROLLUP | ||
| """.stripMargin) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just want to double check. These tests do fail without your fix, right?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, they will fail without this fixing. |
||
|
|
||
| createQueryTest("SPARK-8976 Wrong Result for Rollup #2", | ||
| """ | ||
| SELECT | ||
| count(*) AS cnt, | ||
| key % 5 as k1, | ||
| key-5 as k2, | ||
| GROUPING__ID as k3 | ||
| FROM src group by key%5, key-5 | ||
| WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10 | ||
| """.stripMargin) | ||
|
|
||
| createQueryTest("SPARK-8976 Wrong Result for Rollup #3", | ||
| """ | ||
| SELECT | ||
| count(*) AS cnt, | ||
| key % 5 as k1, | ||
| key-5 as k2, | ||
| GROUPING__ID as k3 | ||
| FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5 | ||
| WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10 | ||
| """.stripMargin) | ||
|
|
||
| createQueryTest("SPARK-8976 Wrong Result for CUBE #1", | ||
| """ | ||
| SELECT count(*) AS cnt, key % 5,GROUPING__ID FROM src group by key%5 WITH CUBE | ||
| """.stripMargin) | ||
|
|
||
| createQueryTest("SPARK-8976 Wrong Result for CUBE #2", | ||
| """ | ||
| SELECT | ||
| count(*) AS cnt, | ||
| key % 5 as k1, | ||
| key-5 as k2, | ||
| GROUPING__ID as k3 | ||
| FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5 | ||
| WITH CUBE ORDER BY cnt, k1, k2, k3 LIMIT 10 | ||
| """.stripMargin) | ||
|
|
||
| createQueryTest("SPARK-8976 Wrong Result for GroupingSet", | ||
| """ | ||
| SELECT | ||
| count(*) AS cnt, | ||
| key % 5 as k1, | ||
| key-5 as k2, | ||
| GROUPING__ID as k3 | ||
| FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5 | ||
| GROUPING SETS (key%5, key-5) ORDER BY cnt, k1, k2, k3 LIMIT 10 | ||
| """.stripMargin) | ||
|
|
||
| createQueryTest("insert table with generator with column name", | ||
| """ | ||
| | CREATE TABLE gen_tmp (key Int); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It will be good to add an example in the comment to explain what we are doing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, ok, I will add more doc there, sorry for the confusing.