From 44ae34d0387f763936cddeceae64ee98b7bb279f Mon Sep 17 00:00:00 2001 From: Brandon Krieger Date: Thu, 7 Jun 2018 16:09:09 -0400 Subject: [PATCH 1/5] SPARK-24488 --- .../sql/catalyst/analysis/Analyzer.scala | 50 ++++++++++--------- .../sql/catalyst/analysis/AnalysisSuite.scala | 5 ++ 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index f9947d1fa6c7..a6ec59a7c75d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1568,11 +1568,13 @@ class Analyzer( expr.find(_.isInstanceOf[Generator]).isDefined } - private def hasNestedGenerator(expr: NamedExpression): Boolean = expr match { - case UnresolvedAlias(_: Generator, _) => false - case Alias(_: Generator, _) => false - case MultiAlias(_: Generator, _) => false - case other => hasGenerator(other) + private def hasNestedGenerator(expr: NamedExpression): Boolean = { + CleanupAliases.trimNonTopLevelAliases(expr) match { + case UnresolvedAlias(_: Generator, _) => false + case Alias(_: Generator, _) => false + case MultiAlias(_: Generator, _) => false + case other => hasGenerator(other) + } } private def trimAlias(expr: NamedExpression): Expression = expr match { @@ -1613,24 +1615,26 @@ class Analyzer( // Holds the resolved generator, if one exists in the project list. var resolvedGenerator: Generate = null - val newProjectList = projectList.flatMap { - case AliasedGenerator(generator, names, outer) if generator.childrenResolved => - // It's a sanity check, this should not happen as the previous case will throw - // exception earlier. - assert(resolvedGenerator == null, "More than one generator found in SELECT.") - - resolvedGenerator = - Generate( - generator, - unrequiredChildIndex = Nil, - outer = outer, - qualifier = None, - generatorOutput = ResolveGenerate.makeGeneratorOutput(generator, names), - child) - - resolvedGenerator.generatorOutput - case other => other :: Nil - } + val newProjectList = projectList + .map(CleanupAliases.trimNonTopLevelAliases(_).asInstanceOf[NamedExpression]) + .flatMap { + case AliasedGenerator(generator, names, outer) if generator.childrenResolved => + // It's a sanity check, this should not happen as the previous case will throw + // exception earlier. + assert(resolvedGenerator == null, "More than one generator found in SELECT.") + + resolvedGenerator = + Generate( + generator, + unrequiredChildIndex = Nil, + outer = outer, + qualifier = None, + generatorOutput = ResolveGenerate.makeGeneratorOutput(generator, names), + child) + + resolvedGenerator.generatorOutput + case other => other :: Nil + } if (resolvedGenerator != null) { Project(newProjectList, resolvedGenerator) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index cd8579584ead..eff0023ec12c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -557,4 +557,9 @@ class AnalysisSuite extends AnalysisTest with Matchers { SubqueryAlias("tbl", testRelation))) assertAnalysisError(barrier, Seq("cannot resolve '`tbl.b`'")) } + + test("SPARK-24488 Generator with multiple aliases") { + assertAnalysisSuccess( + listRelation.select(Explode('list).as("first_alias").as("second_alias"))) + } } From 46c4a55ea465a0a83112a6ce730175c293bef40b Mon Sep 17 00:00:00 2001 From: Brandon Krieger Date: Fri, 8 Jun 2018 10:32:27 -0400 Subject: [PATCH 2/5] unresolved and multi too --- .../sql/catalyst/analysis/Analyzer.scala | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index a6ec59a7c75d..86f4ad9e8c90 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1569,7 +1569,7 @@ class Analyzer( } private def hasNestedGenerator(expr: NamedExpression): Boolean = { - CleanupAliases.trimNonTopLevelAliases(expr) match { + trimNonTopLevelAliases(expr) match { case UnresolvedAlias(_: Generator, _) => false case Alias(_: Generator, _) => false case MultiAlias(_: Generator, _) => false @@ -1577,6 +1577,25 @@ class Analyzer( } } + def trimNonTopLevelAliases(e: Expression): Expression = e match { + case a: UnresolvedAlias => + a.copy(child = trimAliases(a.child)) + case a: Alias => + a.copy(child = trimAliases(a.child))( + exprId = a.exprId, + qualifier = a.qualifier, + explicitMetadata = Some(a.metadata)) + case a: MultiAlias => + a.copy(child = trimAliases(a.child)) + case other => trimAliases(other) + } + + private def trimAliases(e: Expression): Expression = { + e.transformDown { + case expr: NamedExpression => trimAlias(expr) + } + } + private def trimAlias(expr: NamedExpression): Expression = expr match { case UnresolvedAlias(child, _) => child case Alias(child, _) => child @@ -1616,7 +1635,7 @@ class Analyzer( var resolvedGenerator: Generate = null val newProjectList = projectList - .map(CleanupAliases.trimNonTopLevelAliases(_).asInstanceOf[NamedExpression]) + .map(trimNonTopLevelAliases(_).asInstanceOf[NamedExpression]) .flatMap { case AliasedGenerator(generator, names, outer) if generator.childrenResolved => // It's a sanity check, this should not happen as the previous case will throw From f174263e4eb3f00e4870a086bb0fc69bb78f21cc Mon Sep 17 00:00:00 2001 From: Brandon Krieger Date: Fri, 8 Jun 2018 10:51:05 -0400 Subject: [PATCH 3/5] more unit test --- .../apache/spark/sql/catalyst/analysis/AnalysisSuite.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index eff0023ec12c..b2a1c9cac965 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -561,5 +561,11 @@ class AnalysisSuite extends AnalysisTest with Matchers { test("SPARK-24488 Generator with multiple aliases") { assertAnalysisSuccess( listRelation.select(Explode('list).as("first_alias").as("second_alias"))) + assertAnalysisSuccess( + listRelation.select(MultiAlias(MultiAlias( + PosExplode('list), Seq("first_pos", "first_val")), Seq("second_pos", "second_val")))) + assertAnalysisSuccess( + listRelation.select(UnresolvedAlias(UnresolvedAlias( + Explode('list), Some(e => "first_alias")), Some(e => "second_alias")))) } } From abd1457123595364c76f0808b6c3a8ffab59da5b Mon Sep 17 00:00:00 2001 From: Brandon Krieger Date: Mon, 11 Jun 2018 11:00:50 -0400 Subject: [PATCH 4/5] share code --- .../sql/catalyst/analysis/Analyzer.scala | 29 +++++-------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 86f4ad9e8c90..573c31bc03ac 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1569,7 +1569,7 @@ class Analyzer( } private def hasNestedGenerator(expr: NamedExpression): Boolean = { - trimNonTopLevelAliases(expr) match { + CleanupAliases.trimNonTopLevelAliases(expr) match { case UnresolvedAlias(_: Generator, _) => false case Alias(_: Generator, _) => false case MultiAlias(_: Generator, _) => false @@ -1577,25 +1577,6 @@ class Analyzer( } } - def trimNonTopLevelAliases(e: Expression): Expression = e match { - case a: UnresolvedAlias => - a.copy(child = trimAliases(a.child)) - case a: Alias => - a.copy(child = trimAliases(a.child))( - exprId = a.exprId, - qualifier = a.qualifier, - explicitMetadata = Some(a.metadata)) - case a: MultiAlias => - a.copy(child = trimAliases(a.child)) - case other => trimAliases(other) - } - - private def trimAliases(e: Expression): Expression = { - e.transformDown { - case expr: NamedExpression => trimAlias(expr) - } - } - private def trimAlias(expr: NamedExpression): Expression = expr match { case UnresolvedAlias(child, _) => child case Alias(child, _) => child @@ -1635,7 +1616,7 @@ class Analyzer( var resolvedGenerator: Generate = null val newProjectList = projectList - .map(trimNonTopLevelAliases(_).asInstanceOf[NamedExpression]) + .map(CleanupAliases.trimNonTopLevelAliases(_).asInstanceOf[NamedExpression]) .flatMap { case AliasedGenerator(generator, names, outer) if generator.childrenResolved => // It's a sanity check, this should not happen as the previous case will throw @@ -2360,6 +2341,8 @@ object CleanupAliases extends Rule[LogicalPlan] { private def trimAliases(e: Expression): Expression = { e.transformDown { case Alias(child, _) => child + case MultiAlias(child, _) => child + case UnresolvedAlias(child, _) => child } } @@ -2369,6 +2352,10 @@ object CleanupAliases extends Rule[LogicalPlan] { exprId = a.exprId, qualifier = a.qualifier, explicitMetadata = Some(a.metadata)) + case a: MultiAlias => + a.copy(child = trimAliases(a.child)) + case a: UnresolvedAlias => + a.copy(child = trimAliases(a.child)) case other => trimAliases(other) } From 5d5e8e56bdd2701a965fe4ec1715efe87a955acf Mon Sep 17 00:00:00 2001 From: Brandon Krieger Date: Mon, 11 Jun 2018 16:36:27 -0400 Subject: [PATCH 5/5] only handle resolved --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 3 --- .../org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala | 3 --- 2 files changed, 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 573c31bc03ac..30aa925b51dc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2342,7 +2342,6 @@ object CleanupAliases extends Rule[LogicalPlan] { e.transformDown { case Alias(child, _) => child case MultiAlias(child, _) => child - case UnresolvedAlias(child, _) => child } } @@ -2354,8 +2353,6 @@ object CleanupAliases extends Rule[LogicalPlan] { explicitMetadata = Some(a.metadata)) case a: MultiAlias => a.copy(child = trimAliases(a.child)) - case a: UnresolvedAlias => - a.copy(child = trimAliases(a.child)) case other => trimAliases(other) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index b2a1c9cac965..27d526eaf6fc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -564,8 +564,5 @@ class AnalysisSuite extends AnalysisTest with Matchers { assertAnalysisSuccess( listRelation.select(MultiAlias(MultiAlias( PosExplode('list), Seq("first_pos", "first_val")), Seq("second_pos", "second_val")))) - assertAnalysisSuccess( - listRelation.select(UnresolvedAlias(UnresolvedAlias( - Explode('list), Some(e => "first_alias")), Some(e => "second_alias")))) } }