apache · cenyuhai · Aug 30, 2017 · Aug 31, 2017 · Sep 2, 2017 · Sep 2, 2017
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.{LambdaVariable, MapObj
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, _}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.catalyst.util.toPrettySQL
@@ -287,12 +287,6 @@ class Analyzer(
         Seq(Seq.empty)
     }
 
-    private def hasGroupingAttribute(expr: Expression): Boolean = {
-      expr.collectFirst {
-        case u: UnresolvedAttribute if resolver(u.name, VirtualColumn.hiveGroupingIdName) => u
-      }.isDefined
-    }
-
     private[analysis] def hasGroupingFunction(e: Expression): Boolean = {
       e.collectFirst {
         case g: Grouping => g
@@ -446,9 +440,6 @@ class Analyzer(
     // This require transformUp to replace grouping()/grouping_id() in resolved Filter/Sort
     def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
       case a if !a.childrenResolved => a // be sure all of the children are resolved.
-      case p if p.expressions.exists(hasGroupingAttribute) =>
-        failAnalysis(
-          s"${VirtualColumn.hiveGroupingIdName} is deprecated; use grouping_id() instead")
 
       // Ensure group by expressions and aggregate expressions have been resolved.
       case Aggregate(Seq(c @ Cube(groupByExprs)), aggregateExpressions, child)
@@ -1186,6 +1177,10 @@ class Analyzer(
       case q: LogicalPlan =>
         q transformExpressions {
           case u if !u.childrenResolved => u // Skip until children are resolved.
+          case u: UnresolvedAttribute if resolver(u.name, VirtualColumn.hiveGroupingIdName) =>
+            withPosition(u) {
+              Alias(GroupingID(Nil), VirtualColumn.hiveGroupingIdName)()
+            }
           case u @ UnresolvedGenerator(name, children) =>
             withPosition(u) {
               catalog.lookupFunction(name, children) match {

diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql
@@ -38,11 +38,11 @@ SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year)
 GROUP BY CUBE(course, year);
 SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year;
 SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year;
-SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year);
+SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year;
 
 -- GROUPING/GROUPING_ID in having clause
 SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
-HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0;
+HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year;
 SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0;
 SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0;
 SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0;
@@ -54,7 +54,7 @@ SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(co
 ORDER BY GROUPING(course), GROUPING(year), course, year;
 SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course);
 SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course);
-SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id;
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year;
 
 -- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS
 SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2);

diff --git a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
@@ -223,22 +223,29 @@ grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
 -- !query 16
-SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year)
+SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
 -- !query 16 schema
-struct<>
+struct<course:string,year:int,grouping__id:int>
 -- !query 16 output
-org.apache.spark.sql.AnalysisException
-grouping__id is deprecated; use grouping_id() instead;
+Java	2012	0
+Java	2013	0
+dotNET	2012	0
+dotNET	2013	0
+Java	NULL	1
+dotNET	NULL	1
+NULL	2012	2
+NULL	2013	2
+NULL	NULL	3
 
 
 -- !query 17
 SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
-HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0
+HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year
 -- !query 17 schema
 struct<course:string,year:int>
 -- !query 17 output
-Java	NULL
 NULL	NULL
+Java	NULL
 dotNET	NULL
 
 
@@ -263,10 +270,13 @@ grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
 -- !query 20
 SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
 -- !query 20 schema
-struct<>
+struct<course:string,year:int>
 -- !query 20 output
-org.apache.spark.sql.AnalysisException
-grouping__id is deprecated; use grouping_id() instead;
+Java	NULL
+NULL	2012
+NULL	2013
+NULL	NULL
+dotNET	NULL
 
 
 -- !query 21
@@ -322,12 +332,19 @@ grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
 -- !query 25
-SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
 -- !query 25 schema
-struct<>
+struct<course:string,year:int>
 -- !query 25 output
-org.apache.spark.sql.AnalysisException
-grouping__id is deprecated; use grouping_id() instead;
+Java	2012
+Java	2013
+dotNET	2012
+dotNET	2013
+Java	NULL
+dotNET	NULL
+NULL	2012
+NULL	2013
+NULL	NULL
 
 
 -- !query 26

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1414,6 +1414,19 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       ).map(i => Row(i._1, i._2, i._3)))
   }
 
+  test("SPARK-21055 replace grouping__id: Wrong Result for Rollup #1") {
+    checkAnswer(sql(
+      "SELECT count(*) AS cnt, key % 5, grouping__id FROM src GROUP BY key%5 WITH ROLLUP"),
+      Seq(
+        (113, 3, 0),
+        (91, 0, 0),
+        (500, null, 1),
+        (84, 1, 0),
+        (105, 2, 0),
+        (107, 4, 0)
+      ).map(i => Row(i._1, i._2, i._3)))
+  }
+
   test("SPARK-8976 Wrong Result for Rollup #2") {
     checkAnswer(sql(
       """
@@ -1435,6 +1448,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       ).map(i => Row(i._1, i._2, i._3, i._4)))
   }
 
+  test("SPARK-21055 replace grouping__id: Wrong Result for Rollup #2") {
+    checkAnswer(sql(
+      """
+        |SELECT count(*) AS cnt, key % 5 AS k1, key-5 AS k2, grouping__id AS k3
+        |FROM src GROUP BY key%5, key-5
+        |WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10
+      """.stripMargin),
+      Seq(
+        (1, 0, 5, 0),
+        (1, 0, 15, 0),
+        (1, 0, 25, 0),
+        (1, 0, 60, 0),
+        (1, 0, 75, 0),
+        (1, 0, 80, 0),
+        (1, 0, 100, 0),
+        (1, 0, 140, 0),
+        (1, 0, 145, 0),
+        (1, 0, 150, 0)
+      ).map(i => Row(i._1, i._2, i._3, i._4)))
+  }
+
   test("SPARK-8976 Wrong Result for Rollup #3") {
     checkAnswer(sql(
       """
@@ -1456,6 +1490,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       ).map(i => Row(i._1, i._2, i._3, i._4)))
   }
 
+  test("SPARK-21055 replace grouping__id: Wrong Result for Rollup #3") {
+    checkAnswer(sql(
+      """
+        |SELECT count(*) AS cnt, key % 5 AS k1, key-5 AS k2, grouping__id AS k3
+        |FROM (SELECT key, key%2, key - 5 FROM src) t GROUP BY key%5, key-5
+        |WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10
+      """.stripMargin),
+      Seq(
+        (1, 0, 5, 0),
+        (1, 0, 15, 0),
+        (1, 0, 25, 0),
+        (1, 0, 60, 0),
+        (1, 0, 75, 0),
+        (1, 0, 80, 0),
+        (1, 0, 100, 0),
+        (1, 0, 140, 0),
+        (1, 0, 145, 0),
+        (1, 0, 150, 0)
+      ).map(i => Row(i._1, i._2, i._3, i._4)))
+  }
+
   test("SPARK-8976 Wrong Result for CUBE #1") {
     checkAnswer(sql(
       "SELECT count(*) AS cnt, key % 5, grouping_id() FROM src GROUP BY key%5 WITH CUBE"),
@@ -1469,6 +1524,19 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       ).map(i => Row(i._1, i._2, i._3)))
   }
 
+  test("SPARK-21055 replace grouping__id: Wrong Result for CUBE #1") {
+    checkAnswer(sql(
+      "SELECT count(*) AS cnt, key % 5, grouping__id FROM src GROUP BY key%5 WITH CUBE"),
+      Seq(
+        (113, 3, 0),
+        (91, 0, 0),
+        (500, null, 1),
+        (84, 1, 0),
+        (105, 2, 0),
+        (107, 4, 0)
+      ).map(i => Row(i._1, i._2, i._3)))
+  }
+
   test("SPARK-8976 Wrong Result for CUBE #2") {
     checkAnswer(sql(
       """
@@ -1490,6 +1558,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     ).map(i => Row(i._1, i._2, i._3, i._4)))
   }
 
+  test("SPARK-21055 replace grouping__id: Wrong Result for CUBE #2") {
+    checkAnswer(sql(
+      """
+        |SELECT count(*) AS cnt, key % 5 AS k1, key-5 AS k2, grouping__id AS k3
+        |FROM (SELECT key, key%2, key - 5 FROM src) t GROUP BY key%5, key-5
+        |WITH CUBE ORDER BY cnt, k1, k2, k3 LIMIT 10
+      """.stripMargin),
+      Seq(
+        (1, null, -3, 2),
+        (1, null, -1, 2),
+        (1, null, 3, 2),
+        (1, null, 4, 2),
+        (1, null, 5, 2),
+        (1, null, 6, 2),
+        (1, null, 12, 2),
+        (1, null, 14, 2),
+        (1, null, 15, 2),
+        (1, null, 22, 2)
+      ).map(i => Row(i._1, i._2, i._3, i._4)))
+  }
+
   test("SPARK-8976 Wrong Result for GroupingSet") {
     checkAnswer(sql(
       """
@@ -1511,6 +1600,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     ).map(i => Row(i._1, i._2, i._3, i._4)))
   }
 
+  test("SPARK-21055 replace grouping__id: Wrong Result for GroupingSet") {
+    checkAnswer(sql(
+      """
+        |SELECT count(*) AS cnt, key % 5 AS k1, key-5 AS k2, grouping__id AS k3
+        |FROM (SELECT key, key%2, key - 5 FROM src) t GROUP BY key%5, key-5
+        |GROUPING SETS (key%5, key-5) ORDER BY cnt, k1, k2, k3 LIMIT 10
+      """.stripMargin),
+      Seq(
+        (1, null, -3, 2),
+        (1, null, -1, 2),
+        (1, null, 3, 2),
+        (1, null, 4, 2),
+        (1, null, 5, 2),
+        (1, null, 6, 2),
+        (1, null, 12, 2),
+        (1, null, 14, 2),
+        (1, null, 15, 2),
+        (1, null, 22, 2)
+      ).map(i => Row(i._1, i._2, i._3, i._4)))
+  }
+
   ignore("SPARK-10562: partition by column with mixed case name") {
     withTable("tbl10562") {
       val df = Seq(2012 -> "a").toDF("Year", "val")