From 11c5c5797e0fe6879e3434d7b1fae2687bcacd1e Mon Sep 17 00:00:00 2001 From: xdcjie Date: Tue, 29 May 2018 12:57:19 +0800 Subject: [PATCH 1/5] Add project for tranform/map/reduce sql to prune column --- .../spark/sql/catalyst/parser/AstBuilder.scala | 13 ++++++++++++- .../apache/spark/sql/hive/HiveDDLCommandSuite.scala | 9 ++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c15899cb230e..bb57582684f7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -338,6 +338,17 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging // Add where. val withFilter = relation.optionalMap(where)(filter) + // Add project. + val namedExpressions = expressions.map { + case e: NamedExpression => e + case e: Expression => UnresolvedAlias(e) + } + val withProject = if (namedExpressions.nonEmpty) { + Project(namedExpressions, withFilter) + } else { + withFilter + } + // Create the attributes. val (attributes, schemaLess) = if (colTypeList != null) { // Typed return columns. @@ -358,7 +369,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging expressions, string(script), attributes, - withFilter, + withProject, withScriptIOSchema( ctx, inRowFormat, recordWriter, outRowFormat, recordReader, schemaLess)) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index 43ce093f8a7d..9d7e95cd3f2d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -265,11 +265,14 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle "func", Seq.empty, plans.table("e"), null) comparePlans(plan1, - p.copy(child = p.child.where('f < 10), output = Seq('key.string, 'value.string))) + p.copy(child = p.child.where('f < 10).select(UnresolvedAttribute("a"), UnresolvedAttribute("b")), + output = Seq('key.string, 'value.string))) comparePlans(plan2, - p.copy(output = Seq('c.string, 'd.string))) + p.copy(child = p.child.select(UnresolvedAttribute("a"), UnresolvedAttribute("b")), + output = Seq('c.string, 'd.string))) comparePlans(plan3, - p.copy(output = Seq('c.int, 'd.decimal(10, 0)))) + p.copy(child = p.child.select(UnresolvedAttribute("a"), UnresolvedAttribute("b")), + output = Seq('c.int, 'd.decimal(10, 0)))) } test("use backticks in output of Script Transform") { From 083e0c9523aecb55ae4eb8d97a17290eff6946f5 Mon Sep 17 00:00:00 2001 From: xdcjie Date: Tue, 29 May 2018 15:41:47 +0800 Subject: [PATCH 2/5] Add project for tranform/map/reduce sql to prune column --- .../scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index bb57582684f7..cecacc691444 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -341,7 +341,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging // Add project. val namedExpressions = expressions.map { case e: NamedExpression => e - case e: Expression => UnresolvedAlias(e) + case e: _ => UnresolvedAlias(e) } val withProject = if (namedExpressions.nonEmpty) { Project(namedExpressions, withFilter) From 3997cebc06dad513fe58f472022026c00da5f38f Mon Sep 17 00:00:00 2001 From: xdcjie Date: Thu, 31 May 2018 19:36:20 +0800 Subject: [PATCH 3/5] fix unbound wildcard type compile error --- .../scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index cecacc691444..bb57582684f7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -341,7 +341,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging // Add project. val namedExpressions = expressions.map { case e: NamedExpression => e - case e: _ => UnresolvedAlias(e) + case e: Expression => UnresolvedAlias(e) } val withProject = if (namedExpressions.nonEmpty) { Project(namedExpressions, withFilter) From 958888bfc98cfca971876040194a926651c17a7c Mon Sep 17 00:00:00 2001 From: xdcjie Date: Thu, 12 Jul 2018 14:37:15 +0800 Subject: [PATCH 4/5] fix scala style --- .../scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index 9d7e95cd3f2d..bc03d4e95e8b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -265,7 +265,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle "func", Seq.empty, plans.table("e"), null) comparePlans(plan1, - p.copy(child = p.child.where('f < 10).select(UnresolvedAttribute("a"), UnresolvedAttribute("b")), + p.copy( + child = p.child.where('f < 10).select(UnresolvedAttribute("a"), UnresolvedAttribute("b")), output = Seq('key.string, 'value.string))) comparePlans(plan2, p.copy(child = p.child.select(UnresolvedAttribute("a"), UnresolvedAttribute("b")), From 12b688df78a249e619eb119de1a3c55346d3d336 Mon Sep 17 00:00:00 2001 From: xdcjie Date: Mon, 16 Jul 2018 16:47:04 +0800 Subject: [PATCH 5/5] update scala style --- .../org/apache/spark/sql/hive/HiveDDLCommandSuite.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index bc03d4e95e8b..ea8bc72d618c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -269,10 +269,12 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle child = p.child.where('f < 10).select(UnresolvedAttribute("a"), UnresolvedAttribute("b")), output = Seq('key.string, 'value.string))) comparePlans(plan2, - p.copy(child = p.child.select(UnresolvedAttribute("a"), UnresolvedAttribute("b")), + p.copy( + child = p.child.select(UnresolvedAttribute("a"), UnresolvedAttribute("b")), output = Seq('c.string, 'd.string))) comparePlans(plan3, - p.copy(child = p.child.select(UnresolvedAttribute("a"), UnresolvedAttribute("b")), + p.copy( + child = p.child.select(UnresolvedAttribute("a"), UnresolvedAttribute("b")), output = Seq('c.int, 'd.decimal(10, 0)))) }