From c0ecbeea29c39091a7c1105afaa3741e28c19286 Mon Sep 17 00:00:00 2001 From: jinxing Date: Tue, 24 Oct 2017 21:18:56 +0800 Subject: [PATCH 1/2] select grouping__id from subquery --- .../sql/catalyst/analysis/Analyzer.scala | 6 ++++-- .../expressions/namedExpressions.scala | 4 +--- .../ResolveGroupingAnalyticsSuite.scala | 3 +-- .../sql/hive/execution/SQLQuerySuite.scala | 21 +++++++++++++++++++ 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index d6a962a14dc9c..acd4e1059d6c7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1165,9 +1165,11 @@ class Analyzer( case q: LogicalPlan => q transformExpressions { case u if !u.childrenResolved => u // Skip until children are resolved. - case u: UnresolvedAttribute if resolver(u.name, VirtualColumn.hiveGroupingIdName) => + case u: UnresolvedAttribute + if (q.isInstanceOf[GroupingSets] || q.isInstanceOf[Aggregate]) && + resolver(u.name, VirtualColumn.groupingIdName) => withPosition(u) { - Alias(GroupingID(Nil), VirtualColumn.hiveGroupingIdName)() + Alias(GroupingID(Nil), VirtualColumn.groupingIdName)() } case u @ UnresolvedGenerator(name, children) => withPosition(u) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index e518e73cba549..73d2762a9a9d5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -367,8 +367,6 @@ case class OuterReference(e: NamedExpression) } object VirtualColumn { - // The attribute name used by Hive, which has different result than Spark, deprecated. - val hiveGroupingIdName: String = "grouping__id" - val groupingIdName: String = "spark_grouping_id" + val groupingIdName: String = "grouping__id" val groupingIdAttribute: UnresolvedAttribute = UnresolvedAttribute(groupingIdName) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala index 553b1598e7750..f0ed7579e1059 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala @@ -32,8 +32,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest { lazy val unresolved_a = UnresolvedAttribute("a") lazy val unresolved_b = UnresolvedAttribute("b") lazy val unresolved_c = UnresolvedAttribute("c") - lazy val gid = 'spark_grouping_id.int.withNullability(false) - lazy val hive_gid = 'grouping__id.int.withNullability(false) + lazy val gid = 'grouping__id.int.withNullability(false) lazy val grouping_a = Cast(ShiftRight(gid, 1) & 1, ByteType, Option(TimeZone.getDefault().getID)) lazy val nulInt = Literal(null, IntegerType) lazy val nulStr = Literal(null, StringType) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 1cf1c5cd5a472..35d32ba630d9e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1497,6 +1497,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } + test("select grouping__id from subquery.") { + checkAnswer( + sql( + """ + |SELECT cnt, k2, k3, grouping__id + |FROM + | (SELECT count(*) as cnt, k2, k3, grouping__id + | FROM (SELECT key, key%2 as k2 , key%3 as k3 FROM src) t1 + | GROUP BY k2, k3 + | GROUPING SETS(k2, k3)) t2 + |ORDER BY grouping__id, k2, k3 + """.stripMargin), + Seq( + (247, 0, null, 1), + (253, 1, null, 1), + (169, null, 0, 2), + (165, null, 1, 2), + (166, null, 2, 2) + ).map(i => Row(i._1, i._2, i._3, i._4))) + } + ignore("SPARK-10562: partition by column with mixed case name") { withTable("tbl10562") { val df = Seq(2012 -> "a").toDF("Year", "val") From a593442bd026ec70b4c29f2d247300f7b6d829ec Mon Sep 17 00:00:00 2001 From: jinxing Date: Wed, 8 Nov 2017 18:04:16 +0800 Subject: [PATCH 2/2] Add alias only when it's grouping analytics operator --- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index acd4e1059d6c7..73710e9d939aa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1161,13 +1161,19 @@ class Analyzer( * Replaces [[UnresolvedFunction]]s with concrete [[Expression]]s. */ object ResolveFunctions extends Rule[LogicalPlan] { + private def isGroupingAnalyticsOp(plan: LogicalPlan): Boolean = plan match { + case Aggregate(Seq(c @ Cube(groupByExprs)), _, _) => true + case Aggregate(Seq(r @ Rollup(groupByExprs)), _, _) => true + case x: GroupingSets => true + case _ => false + } + def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { case q: LogicalPlan => q transformExpressions { case u if !u.childrenResolved => u // Skip until children are resolved. case u: UnresolvedAttribute - if (q.isInstanceOf[GroupingSets] || q.isInstanceOf[Aggregate]) && - resolver(u.name, VirtualColumn.groupingIdName) => + if resolver(u.name, VirtualColumn.groupingIdName) && isGroupingAnalyticsOp(q) => withPosition(u) { Alias(GroupingID(Nil), VirtualColumn.groupingIdName)() }