From 4b140f6d50d03c8d0358396f0c9a436ea027bd42 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 31 May 2019 13:29:55 -0700 Subject: [PATCH 1/3] SPARK-27909: Update CTE substitution to be independent. --- .../sql/catalyst/analysis/Analyzer.scala | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 91365fcbb91f..ff8105be645e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -215,23 +215,26 @@ class Analyzer( object CTESubstitution extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { case With(child, relations) => - substituteCTE(child, relations.foldLeft(Seq.empty[(String, LogicalPlan)]) { - case (resolved, (name, relation)) => - resolved :+ name -> executeSameContext(substituteCTE(relation, resolved)) - }) + // substitute CTE expressions right-to-left to resolve references to previous CTEs: + // with a as (select * from t) with b as (select * from a) select * from b + relations.reverse.foldLeft(child) { + case (currentPlan, (cteName, ctePlan)) => + substituteCTE(currentPlan, cteName, ctePlan) + } case other => other } - def substituteCTE(plan: LogicalPlan, cteRelations: Seq[(String, LogicalPlan)]): LogicalPlan = { - plan resolveOperatorsDown { + def substituteCTE(plan: LogicalPlan, cteName: String, ctePlan: LogicalPlan): LogicalPlan = { + plan resolveOperatorsUp { + case UnresolvedRelation(TableIdentifier(table, None)) if resolver(cteName, table) => + ctePlan case u: UnresolvedRelation => - cteRelations.find(x => resolver(x._1, u.tableIdentifier.table)) - .map(_._2).getOrElse(u) + u case other => // This cannot be done in ResolveSubquery because ResolveSubquery does not know the CTE. other transformExpressions { case e: SubqueryExpression => - e.withNewPlan(substituteCTE(e.plan, cteRelations)) + e.withNewPlan(substituteCTE(e.plan, cteName, ctePlan)) } } } From 32317efc85245c19745a0676bab0b2e9d57d1195 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Mon, 3 Jun 2019 09:14:10 -0700 Subject: [PATCH 2/3] Use foldRight instead of reverse.foldLeft. --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ff8105be645e..5d5d95733253 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -217,8 +217,8 @@ class Analyzer( case With(child, relations) => // substitute CTE expressions right-to-left to resolve references to previous CTEs: // with a as (select * from t) with b as (select * from a) select * from b - relations.reverse.foldLeft(child) { - case (currentPlan, (cteName, ctePlan)) => + relations.foldRight(child) { + case ((cteName, ctePlan), currentPlan) => substituteCTE(currentPlan, cteName, ctePlan) } case other => other From 6ae409d3d4bfc2d617814f0b4eb923b7d84f8854 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Tue, 4 Jun 2019 09:25:11 -0700 Subject: [PATCH 3/3] Update comment. --- .../scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 5d5d95733253..841b85843c88 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -216,7 +216,7 @@ class Analyzer( def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { case With(child, relations) => // substitute CTE expressions right-to-left to resolve references to previous CTEs: - // with a as (select * from t) with b as (select * from a) select * from b + // with a as (select * from t), b as (select * from a) select * from b relations.foldRight(child) { case ((cteName, ctePlan), currentPlan) => substituteCTE(currentPlan, cteName, ctePlan)