Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.{LambdaVariable, MapObj
import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, _}
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
import org.apache.spark.sql.catalyst.trees.TreeNodeRef
import org.apache.spark.sql.catalyst.util.toPrettySQL
Expand Down Expand Up @@ -287,12 +287,6 @@ class Analyzer(
Seq(Seq.empty)
}

private def hasGroupingAttribute(expr: Expression): Boolean = {
expr.collectFirst {
case u: UnresolvedAttribute if resolver(u.name, VirtualColumn.hiveGroupingIdName) => u
}.isDefined
}

private[analysis] def hasGroupingFunction(e: Expression): Boolean = {
e.collectFirst {
case g: Grouping => g
Expand Down Expand Up @@ -446,9 +440,6 @@ class Analyzer(
// This require transformUp to replace grouping()/grouping_id() in resolved Filter/Sort
def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
case a if !a.childrenResolved => a // be sure all of the children are resolved.
case p if p.expressions.exists(hasGroupingAttribute) =>
failAnalysis(
s"${VirtualColumn.hiveGroupingIdName} is deprecated; use grouping_id() instead")

// Ensure group by expressions and aggregate expressions have been resolved.
case Aggregate(Seq(c @ Cube(groupByExprs)), aggregateExpressions, child)
Expand Down Expand Up @@ -1186,6 +1177,10 @@ class Analyzer(
case q: LogicalPlan =>
q transformExpressions {
case u if !u.childrenResolved => u // Skip until children are resolved.
case u: UnresolvedAttribute if resolver(u.name, VirtualColumn.hiveGroupingIdName) =>
withPosition(u) {
Alias(GroupingID(Nil), VirtualColumn.hiveGroupingIdName)()
}
case u @ UnresolvedGenerator(name, children) =>
withPosition(u) {
catalog.lookupFunction(name, children) match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year)
GROUP BY CUBE(course, year);
SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year;
SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year;
SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year);
SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year;

-- GROUPING/GROUPING_ID in having clause
SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0;
HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year;
SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0;
SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0;
SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0;
Expand All @@ -54,7 +54,7 @@ SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(co
ORDER BY GROUPING(course), GROUPING(year), course, year;
SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course);
SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course);
SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id;
SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year;

-- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS
SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,22 +223,29 @@ grouping_id() can only be used with GroupingSets/Cube/Rollup;


-- !query 16
SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year)
SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why you only commit group-analytics.sql.out? You should also commit modified group-analytics.sql.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for your tips

-- !query 16 schema
struct<>
struct<course:string,year:int,grouping__id:int>
-- !query 16 output
org.apache.spark.sql.AnalysisException
grouping__id is deprecated; use grouping_id() instead;
Java 2012 0
Java 2013 0
dotNET 2012 0
dotNET 2013 0
Java NULL 1
dotNET NULL 1
NULL 2012 2
NULL 2013 2
NULL NULL 3


-- !query 17
SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0
HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year
-- !query 17 schema
struct<course:string,year:int>
-- !query 17 output
Java NULL
NULL NULL
Java NULL
dotNET NULL


Expand All @@ -263,10 +270,13 @@ grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
-- !query 20
SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
-- !query 20 schema
struct<>
struct<course:string,year:int>
-- !query 20 output
org.apache.spark.sql.AnalysisException
grouping__id is deprecated; use grouping_id() instead;
Java NULL
NULL 2012
NULL 2013
NULL NULL
dotNET NULL


-- !query 21
Expand Down Expand Up @@ -322,12 +332,19 @@ grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;


-- !query 25
SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id
SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
-- !query 25 schema
struct<>
struct<course:string,year:int>
-- !query 25 output
org.apache.spark.sql.AnalysisException
grouping__id is deprecated; use grouping_id() instead;
Java 2012
Java 2013
dotNET 2012
dotNET 2013
Java NULL
dotNET NULL
NULL 2012
NULL 2013
NULL NULL


-- !query 26
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1414,6 +1414,19 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
).map(i => Row(i._1, i._2, i._3)))
}

test("SPARK-21055 replace grouping__id: Wrong Result for Rollup #1") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of adding a new one, you can just make a change on the existing one.

  test("SPARK-8976 Wrong Result for Rollup #1") {
    Seq("grouping_id()", "grouping__id").foreach { gid =>
      checkAnswer(sql(
        s"SELECT count(*) AS cnt, key % 5, $gid FROM src GROUP BY key%5 WITH ROLLUP"),
        Seq(
          (113, 3, 0),
          (91, 0, 0),
          (500, null, 1),
          (84, 1, 0),
          (105, 2, 0),
          (107, 4, 0)
        ).map(i => Row(i._1, i._2, i._3)))
    }
  }

checkAnswer(sql(
"SELECT count(*) AS cnt, key % 5, grouping__id FROM src GROUP BY key%5 WITH ROLLUP"),
Seq(
(113, 3, 0),
(91, 0, 0),
(500, null, 1),
(84, 1, 0),
(105, 2, 0),
(107, 4, 0)
).map(i => Row(i._1, i._2, i._3)))
}

test("SPARK-8976 Wrong Result for Rollup #2") {
checkAnswer(sql(
"""
Expand All @@ -1435,6 +1448,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
).map(i => Row(i._1, i._2, i._3, i._4)))
}

test("SPARK-21055 replace grouping__id: Wrong Result for Rollup #2") {
checkAnswer(sql(
"""
|SELECT count(*) AS cnt, key % 5 AS k1, key-5 AS k2, grouping__id AS k3
|FROM src GROUP BY key%5, key-5
|WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10
""".stripMargin),
Seq(
(1, 0, 5, 0),
(1, 0, 15, 0),
(1, 0, 25, 0),
(1, 0, 60, 0),
(1, 0, 75, 0),
(1, 0, 80, 0),
(1, 0, 100, 0),
(1, 0, 140, 0),
(1, 0, 145, 0),
(1, 0, 150, 0)
).map(i => Row(i._1, i._2, i._3, i._4)))
}

test("SPARK-8976 Wrong Result for Rollup #3") {
checkAnswer(sql(
"""
Expand All @@ -1456,6 +1490,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
).map(i => Row(i._1, i._2, i._3, i._4)))
}

test("SPARK-21055 replace grouping__id: Wrong Result for Rollup #3") {
checkAnswer(sql(
"""
|SELECT count(*) AS cnt, key % 5 AS k1, key-5 AS k2, grouping__id AS k3
|FROM (SELECT key, key%2, key - 5 FROM src) t GROUP BY key%5, key-5
|WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10
""".stripMargin),
Seq(
(1, 0, 5, 0),
(1, 0, 15, 0),
(1, 0, 25, 0),
(1, 0, 60, 0),
(1, 0, 75, 0),
(1, 0, 80, 0),
(1, 0, 100, 0),
(1, 0, 140, 0),
(1, 0, 145, 0),
(1, 0, 150, 0)
).map(i => Row(i._1, i._2, i._3, i._4)))
}

test("SPARK-8976 Wrong Result for CUBE #1") {
checkAnswer(sql(
"SELECT count(*) AS cnt, key % 5, grouping_id() FROM src GROUP BY key%5 WITH CUBE"),
Expand All @@ -1469,6 +1524,19 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
).map(i => Row(i._1, i._2, i._3)))
}

test("SPARK-21055 replace grouping__id: Wrong Result for CUBE #1") {
checkAnswer(sql(
"SELECT count(*) AS cnt, key % 5, grouping__id FROM src GROUP BY key%5 WITH CUBE"),
Seq(
(113, 3, 0),
(91, 0, 0),
(500, null, 1),
(84, 1, 0),
(105, 2, 0),
(107, 4, 0)
).map(i => Row(i._1, i._2, i._3)))
}

test("SPARK-8976 Wrong Result for CUBE #2") {
checkAnswer(sql(
"""
Expand All @@ -1490,6 +1558,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
).map(i => Row(i._1, i._2, i._3, i._4)))
}

test("SPARK-21055 replace grouping__id: Wrong Result for CUBE #2") {
checkAnswer(sql(
"""
|SELECT count(*) AS cnt, key % 5 AS k1, key-5 AS k2, grouping__id AS k3
|FROM (SELECT key, key%2, key - 5 FROM src) t GROUP BY key%5, key-5
|WITH CUBE ORDER BY cnt, k1, k2, k3 LIMIT 10
""".stripMargin),
Seq(
(1, null, -3, 2),
(1, null, -1, 2),
(1, null, 3, 2),
(1, null, 4, 2),
(1, null, 5, 2),
(1, null, 6, 2),
(1, null, 12, 2),
(1, null, 14, 2),
(1, null, 15, 2),
(1, null, 22, 2)
).map(i => Row(i._1, i._2, i._3, i._4)))
}

test("SPARK-8976 Wrong Result for GroupingSet") {
checkAnswer(sql(
"""
Expand All @@ -1511,6 +1600,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
).map(i => Row(i._1, i._2, i._3, i._4)))
}

test("SPARK-21055 replace grouping__id: Wrong Result for GroupingSet") {
checkAnswer(sql(
"""
|SELECT count(*) AS cnt, key % 5 AS k1, key-5 AS k2, grouping__id AS k3
|FROM (SELECT key, key%2, key - 5 FROM src) t GROUP BY key%5, key-5
|GROUPING SETS (key%5, key-5) ORDER BY cnt, k1, k2, k3 LIMIT 10
""".stripMargin),
Seq(
(1, null, -3, 2),
(1, null, -1, 2),
(1, null, 3, 2),
(1, null, 4, 2),
(1, null, 5, 2),
(1, null, 6, 2),
(1, null, 12, 2),
(1, null, 14, 2),
(1, null, 15, 2),
(1, null, 22, 2)
).map(i => Row(i._1, i._2, i._3, i._4)))
}

ignore("SPARK-10562: partition by column with mixed case name") {
withTable("tbl10562") {
val df = Seq(2012 -> "a").toDF("Year", "val")
Expand Down