From 2b2a5a33ed58ce07fd2515eb01e80acbedeb8b2a Mon Sep 17 00:00:00 2001 From: 10129659 Date: Fri, 20 Jul 2018 09:43:53 +0800 Subject: [PATCH 1/7] Cache can't work normally if there are case letters in SQL --- .../spark/sql/catalyst/plans/QueryPlan.scala | 8 +++-- .../expressions/CanonicalizeSuite.scala | 30 ++++++++++++++++++- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 4b4722b0b2117..b15bf6d9428a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.plans +import java.util.Locale + import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode} import org.apache.spark.sql.internal.SQLConf @@ -237,7 +239,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT // Top level `AttributeReference` may also be used for output like `Alias`, we should // normalize the epxrId too. id += 1 - ar.withExprId(ExprId(id)).canonicalized + ar.withExprId(ExprId(id)).withName(ar.name.toLowerCase(Locale.ROOT)).canonicalized case other => QueryPlan.normalizeExprId(other, allAttributes) }.withNewChildren(canonicalizedChildren) @@ -282,9 +284,9 @@ object QueryPlan extends PredicateHelper { case ar: AttributeReference => val ordinal = input.indexOf(ar.exprId) if (ordinal == -1) { - ar + ar.withName(ar.name.toLowerCase(Locale.ROOT)) } else { - ar.withExprId(ExprId(ordinal)) + ar.withExprId(ExprId(ordinal)).withName(ar.name.toLowerCase(Locale.ROOT)) } }.canonicalized.asInstanceOf[T] } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala index 28e6940f3cca3..8ea8196c0be6e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala @@ -18,8 +18,10 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.plans.logical.Range +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalRelation, Range} +import org.apache.spark.sql.types.LongType class CanonicalizeSuite extends SparkFunSuite { @@ -50,4 +52,30 @@ class CanonicalizeSuite extends SparkFunSuite { assert(range.where(arrays1).sameResult(range.where(arrays2))) assert(!range.where(arrays1).sameResult(range.where(arrays3))) } + + test("Canonicalized result is not case-insensitive") { + val u1 = 'A.string.at(0) + val u2 = 'B.string.at(1) + val u3 = 'C.string.at(2) + val caseA = CaseWhen(Seq((u1, u2)), u3) + val otherA = AttributeReference("D", LongType)(exprId = ExprId(3)) + val aliases = Alias(sum(caseA), "E")() :: Nil + val planUppercase = Aggregate( + Nil, + aliases, + LocalRelation(otherA)) + + val l1 = 'a.string.at(0) + val l2 = 'b.string.at(1) + val l3 = 'c.string.at(2) + val caseALower = CaseWhen(Seq((l1, l2)), l3) + val otherALower = AttributeReference("d", LongType)(exprId = ExprId(3)) + val aliasesLower = Alias(sum(caseALower), "e")() :: Nil + val planLowercase = Aggregate( + Nil, + aliasesLower, + LocalRelation(otherALower)) + + assert(planUppercase.canonicalized == planLowercase.canonicalized) + } } From c01cf897daea314c43c96253c0b41aace72637ac Mon Sep 17 00:00:00 2001 From: 10129659 Date: Fri, 20 Jul 2018 16:36:34 +0800 Subject: [PATCH 2/7] erase the attribute name --- .../org/apache/spark/sql/catalyst/plans/QueryPlan.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index b15bf6d9428a4..7a15ac61c2af8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -239,7 +239,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT // Top level `AttributeReference` may also be used for output like `Alias`, we should // normalize the epxrId too. id += 1 - ar.withExprId(ExprId(id)).withName(ar.name.toLowerCase(Locale.ROOT)).canonicalized + ar.withExprId(ExprId(id)).withName("").canonicalized case other => QueryPlan.normalizeExprId(other, allAttributes) }.withNewChildren(canonicalizedChildren) @@ -284,9 +284,9 @@ object QueryPlan extends PredicateHelper { case ar: AttributeReference => val ordinal = input.indexOf(ar.exprId) if (ordinal == -1) { - ar.withName(ar.name.toLowerCase(Locale.ROOT)) + ar.withName("") } else { - ar.withExprId(ExprId(ordinal)).withName(ar.name.toLowerCase(Locale.ROOT)) + ar.withExprId(ExprId(ordinal)).withName("") } }.canonicalized.asInstanceOf[T] } From 1aefcb370ad972cfc17315d000569da1f11c61ef Mon Sep 17 00:00:00 2001 From: 10129659 Date: Fri, 20 Jul 2018 16:50:51 +0800 Subject: [PATCH 3/7] erase the attribute name --- .../scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 7a15ac61c2af8..2b4a643418542 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.catalyst.plans -import java.util.Locale - import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode} import org.apache.spark.sql.internal.SQLConf From b5b2a1b5c1c2e8b04fe40c165c5827f3380a472b Mon Sep 17 00:00:00 2001 From: 10129659 Date: Fri, 20 Jul 2018 17:36:11 +0800 Subject: [PATCH 4/7] using canonicalized --- .../spark/sql/catalyst/plans/QueryPlan.scala | 6 ++-- .../expressions/CanonicalizeSuite.scala | 30 +------------------ .../spark/sql/execution/SameResultSuite.scala | 15 ++++++++++ 3 files changed, 19 insertions(+), 32 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 2b4a643418542..207146e477a12 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -237,7 +237,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT // Top level `AttributeReference` may also be used for output like `Alias`, we should // normalize the epxrId too. id += 1 - ar.withExprId(ExprId(id)).withName("").canonicalized + ar.withExprId(ExprId(id)).canonicalized case other => QueryPlan.normalizeExprId(other, allAttributes) }.withNewChildren(canonicalizedChildren) @@ -282,9 +282,9 @@ object QueryPlan extends PredicateHelper { case ar: AttributeReference => val ordinal = input.indexOf(ar.exprId) if (ordinal == -1) { - ar.withName("") + ar.canonicalized } else { - ar.withExprId(ExprId(ordinal)).withName("") + ar.withExprId(ExprId(ordinal)).canonicalized } }.canonicalized.asInstanceOf[T] } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala index 8ea8196c0be6e..28e6940f3cca3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala @@ -18,10 +18,8 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalRelation, Range} -import org.apache.spark.sql.types.LongType +import org.apache.spark.sql.catalyst.plans.logical.Range class CanonicalizeSuite extends SparkFunSuite { @@ -52,30 +50,4 @@ class CanonicalizeSuite extends SparkFunSuite { assert(range.where(arrays1).sameResult(range.where(arrays2))) assert(!range.where(arrays1).sameResult(range.where(arrays3))) } - - test("Canonicalized result is not case-insensitive") { - val u1 = 'A.string.at(0) - val u2 = 'B.string.at(1) - val u3 = 'C.string.at(2) - val caseA = CaseWhen(Seq((u1, u2)), u3) - val otherA = AttributeReference("D", LongType)(exprId = ExprId(3)) - val aliases = Alias(sum(caseA), "E")() :: Nil - val planUppercase = Aggregate( - Nil, - aliases, - LocalRelation(otherA)) - - val l1 = 'a.string.at(0) - val l2 = 'b.string.at(1) - val l3 = 'c.string.at(2) - val caseALower = CaseWhen(Seq((l1, l2)), l3) - val otherALower = AttributeReference("d", LongType)(exprId = ExprId(3)) - val aliasesLower = Alias(sum(caseALower), "e")() :: Nil - val planLowercase = Aggregate( - Nil, - aliasesLower, - LocalRelation(otherALower)) - - assert(planUppercase.canonicalized == planLowercase.canonicalized) - } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala index aaf51b5b90111..06d8a3b513b96 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala @@ -18,8 +18,11 @@ package org.apache.spark.sql.execution import org.apache.spark.sql.{DataFrame, QueryTest} +import org.apache.spark.sql.catalyst.expressions.AttributeReference +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project} import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.sql.types.IntegerType /** * Tests for the sameResult function for [[SparkPlan]]s. @@ -58,4 +61,16 @@ class SameResultSuite extends QueryTest with SharedSQLContext { val df4 = spark.range(10).agg(sumDistinct($"id")) assert(df3.queryExecution.executedPlan.sameResult(df4.queryExecution.executedPlan)) } + + test("Canonicalized result is not case-insensitive") { + val a = AttributeReference("A", IntegerType)() + val b = AttributeReference("B", IntegerType)() + val planUppercase = Project(Seq(a, b), LocalRelation(a)) + + val c = AttributeReference("a", IntegerType)() + val d = AttributeReference("b", IntegerType)() + val planLowercase = Project(Seq(c, d), LocalRelation(c)) + + assert(planUppercase.sameResult(planLowercase)) + } } From 86c7ed6dd4e2790e64148ff2dc6e856b2b2fd80a Mon Sep 17 00:00:00 2001 From: 10129659 Date: Fri, 20 Jul 2018 17:43:57 +0800 Subject: [PATCH 5/7] using canonicalized --- .../scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 207146e477a12..b1ffdca091461 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -282,7 +282,7 @@ object QueryPlan extends PredicateHelper { case ar: AttributeReference => val ordinal = input.indexOf(ar.exprId) if (ordinal == -1) { - ar.canonicalized + ar } else { ar.withExprId(ExprId(ordinal)).canonicalized } From f2091a45b88b0a1bc57ec2ec9cf91a9133335827 Mon Sep 17 00:00:00 2001 From: 10129659 Date: Sat, 21 Jul 2018 10:15:21 +0800 Subject: [PATCH 6/7] project --- .../org/apache/spark/sql/execution/SameResultSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala index 06d8a3b513b96..541e62b9a3b10 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala @@ -65,11 +65,11 @@ class SameResultSuite extends QueryTest with SharedSQLContext { test("Canonicalized result is not case-insensitive") { val a = AttributeReference("A", IntegerType)() val b = AttributeReference("B", IntegerType)() - val planUppercase = Project(Seq(a, b), LocalRelation(a)) + val planUppercase = Project(Seq(a), LocalRelation(a, b)) val c = AttributeReference("a", IntegerType)() val d = AttributeReference("b", IntegerType)() - val planLowercase = Project(Seq(c, d), LocalRelation(c)) + val planLowercase = Project(Seq(c), LocalRelation(c, d)) assert(planUppercase.sameResult(planLowercase)) } From f3a79636469470a620fc755374e2a90c2c39d3ba Mon Sep 17 00:00:00 2001 From: 10129659 Date: Tue, 24 Jul 2018 09:02:24 +0800 Subject: [PATCH 7/7] Canonicalized result is case-insensitive --- .../scala/org/apache/spark/sql/execution/SameResultSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala index 541e62b9a3b10..d088e24e53bfe 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala @@ -62,7 +62,7 @@ class SameResultSuite extends QueryTest with SharedSQLContext { assert(df3.queryExecution.executedPlan.sameResult(df4.queryExecution.executedPlan)) } - test("Canonicalized result is not case-insensitive") { + test("Canonicalized result is case-insensitive") { val a = AttributeReference("A", IntegerType)() val b = AttributeReference("B", IntegerType)() val planUppercase = Project(Seq(a), LocalRelation(a, b))