From a85db5ea9d466f33266d95f6ae3739a6a713f68b Mon Sep 17 00:00:00 2001 From: windpiger Date: Mon, 12 Dec 2016 22:17:54 +0800 Subject: [PATCH 1/3] [SPARK-18609][SQL][WIP]Fix when CTE with Join between two table with same column name --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 7 ++++--- .../spark/sql/hive/execution/SQLQuerySuite.scala | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 37f0c8ed19d3..d546a91ca16f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -26,7 +26,7 @@ import org.apache.spark.api.java.function.FilterFunction import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{CatalystConf, SimpleCatalystConf} import org.apache.spark.sql.catalyst.analysis._ -import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} +import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} @@ -200,6 +200,7 @@ object RemoveAliasOnlyProject extends Rule[LogicalPlan] { case plan: Project if plan eq proj => plan.child case plan => plan transformExpressions { case a: Attribute if attrMap.contains(a) => attrMap(a) + case b: Alias if (attrMap.exists(_._1.exprId == b.exprId)) => b.child } } }.getOrElse(plan) @@ -416,8 +417,8 @@ object ColumnPruning extends Rule[LogicalPlan] { case w: Window if w.windowExpressions.isEmpty => w.child // Eliminate no-op Projects - case p @ Project(_, child) if sameOutput(child.output, p.output) => child - + case p @ Project(_, child) if sameOutput(child.output, p.output) => + if (child.isInstanceOf[CatalogRelation]) p else child // Can't prune the columns on LeafNode case p @ Project(_, _: LeafNode) => p diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index e607af67f93e..378662fed8bf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2011,6 +2011,22 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } + test("test CTE with join between two table with the same column name ") { + sql("DROP TABLE IF EXISTS p1") + sql("DROP TABLE IF EXISTS p2") + sql("CREATE TABLE p1 (col String)" ) + sql("CREATE TABLE p2 (col String)") + + assert( + sql( + """ + | WITH CTE AS + | (SELECT s2.col as col FROM p1 + | CROSS JOIN (SELECT e.col as col FROM p2 E) s2) + | SELECT T1.col as c1,T2.col as c2 FROM CTE T1 CROSS JOIN CTE T2 + """.stripMargin).collect.isEmpty) + } + def testCommandAvailable(command: String): Boolean = { val attempt = Try(Process(command).run(ProcessLogger(_ => ())).exitValue()) attempt.isSuccess && attempt.get == 0 From f8d602a128ad32690b519b6635cf74407d7a85d5 Mon Sep 17 00:00:00 2001 From: windpiger Date: Tue, 13 Dec 2016 08:41:49 +0800 Subject: [PATCH 2/3] fix when alias's child is not a NamedExpression --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index d546a91ca16f..27714b19d13d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -200,7 +200,8 @@ object RemoveAliasOnlyProject extends Rule[LogicalPlan] { case plan: Project if plan eq proj => plan.child case plan => plan transformExpressions { case a: Attribute if attrMap.contains(a) => attrMap(a) - case b: Alias if (attrMap.exists(_._1.exprId == b.exprId)) => b.child + case b: Alias if attrMap.exists(_._1.exprId == b.exprId) + && b.child.isInstanceOf[NamedExpression] => b.child } } }.getOrElse(plan) From 0413f9dad4ad1294e3400dc0f42f66529b1b055b Mon Sep 17 00:00:00 2001 From: windpiger Date: Wed, 14 Dec 2016 09:37:44 +0800 Subject: [PATCH 3/3] remove a relation condition in RemoveAliasOnlyProject --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 27714b19d13d..fa5f1f121818 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -418,8 +418,8 @@ object ColumnPruning extends Rule[LogicalPlan] { case w: Window if w.windowExpressions.isEmpty => w.child // Eliminate no-op Projects - case p @ Project(_, child) if sameOutput(child.output, p.output) => - if (child.isInstanceOf[CatalogRelation]) p else child + case p @ Project(_, child) if sameOutput(child.output, p.output) => child + // Can't prune the columns on LeafNode case p @ Project(_, _: LeafNode) => p