diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 1e934d0aa0e51..dd868f3e89329 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1115,6 +1115,8 @@ class Analyzer( g.copy(join = true, child = addMissingAttr(g.child, missing)) case d: Distinct => throw new AnalysisException(s"Can't add $missingAttrs to $d") + case u: Union => + u.withNewChildren(u.children.map(addMissingAttr(_, missingAttrs))) case u: UnaryNode => u.withNewChildren(addMissingAttr(u.child, missingAttrs) :: Nil) case other => @@ -1133,6 +1135,8 @@ class Analyzer( resolved } else { plan match { + case u: Union if !u.children.head.isInstanceOf[SubqueryAlias] => + resolveExpressionRecursively(resolved, u.children.head) case u: UnaryNode if !u.isInstanceOf[SubqueryAlias] => resolveExpressionRecursively(resolved, u.child) case other => resolved diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 13341645e8ff8..045b3cfbb14e5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2039,4 +2039,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { testData2.select(lit(7), 'a, 'b).orderBy(lit(1), lit(2), lit(3)), Seq(Row(7, 1, 1), Row(7, 1, 2), Row(7, 2, 1), Row(7, 2, 2), Row(7, 3, 1), Row(7, 3, 2))) } + + test("SPARK-21966: ResolveMissingReference rule should not ignore Union") { + val df1 = Seq((1, 1), (2, 1), (2, 2)).toDF("a", "b") + val df2 = Seq((1, 1), (1, 2), (2, 3)).toDF("a", "b") + val df3 = df1.cube("a").sum("b") + val df4 = df2.cube("a").sum("b") + val df = df3.union(df4).filter("grouping_id() = 0") + checkAnswer(df, Seq(Row(1, 1), Row(2, 3), Row(1, 3), Row(2, 3))) + } }