From 58de88c21210d469b8ef14b1f23764c31ca5651e Mon Sep 17 00:00:00 2001 From: Jia Li Date: Fri, 17 Nov 2017 17:15:01 -0800 Subject: [PATCH 01/10] [SPARK-22548][SQL] Incorrect nested AND expression pushed down to JDBC data source --- .../datasources/DataSourceStrategy.scala | 5 +++- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 25 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index 04d6f3f56eb0..2e5173feb8aa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -497,7 +497,10 @@ object DataSourceStrategy { Some(sources.IsNotNull(a.name)) case expressions.And(left, right) => - (translateFilter(left) ++ translateFilter(right)).reduceOption(sources.And) + for { + leftFilter <- translateFilter(left) + rightFilter <- translateFilter(right) + } yield sources.And(leftFilter, rightFilter) case expressions.Or(left, right) => for { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 88a5f618d604..372aac637341 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -296,8 +296,33 @@ class JDBCSuite extends SparkFunSuite // The older versions of spark have this kind of bugs in parquet data source. val df1 = sql("SELECT * FROM foobar WHERE NOT (THEID != 2 AND NAME != 'mary')") val df2 = sql("SELECT * FROM foobar WHERE NOT (THEID != 2) OR NOT (NAME != 'mary')") + val df3 = sql("SELECT * FROM foobar WHERE (THEID > 0 AND NAME = 'mary') OR (NAME = 'fred')") + val df4 = sql("SELECT * FROM foobar " + + "WHERE (THEID > 0 AND TRIM(NAME) = 'mary') OR (NAME = 'fred')") + val df5 = sql("SELECT * FROM foobar " + + "WHERE THEID > 0 AND TRIM(NAME) = 'mary' AND LENGTH(NAME) > 3") + val df6 = sql("SELECT * FROM foobar " + + "WHERE THEID < 0 OR NAME = 'mary' OR NAME = 'fred'") + val df7 = sql("SELECT * FROM foobar " + + "WHERE THEID < 0 OR TRIM(NAME) = 'mary' OR NAME = 'fred'") + val df8 = sql("SELECT * FROM foobar " + + "WHERE NOT((THEID < 0 OR NAME != 'mary') AND (THEID != 1 OR NAME != 'fred'))") + val df9 = sql("SELECT * FROM foobar " + + "WHERE NOT((THEID < 0 OR NAME != 'mary') AND (THEID != 1 OR TRIM(NAME) != 'fred'))") + val df10 = sql("SELECT * FROM foobar " + + "WHERE (NOT(THEID < 0 OR TRIM(NAME) != 'mary')) OR (THEID = 1 AND NAME = 'fred')") + assert(df1.collect.toSet === Set(Row("mary", 2))) assert(df2.collect.toSet === Set(Row("mary", 2))) + assert(df3.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) + assert(df4.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) + assert(df5.collect.toSet === Set(Row("mary", 2))) + assert(df6.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) + assert(df7.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) + assert(df8.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) + assert(df9.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) + assert(df10.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) + def checkNotPushdown(df: DataFrame): DataFrame = { val parentPlan = df.queryExecution.executedPlan From e5407902c92b3936395ae1d38eba8bf20dfcbb43 Mon Sep 17 00:00:00 2001 From: Jia Li Date: Sun, 19 Nov 2017 23:20:00 -0800 Subject: [PATCH 02/10] address comment to add PR discussion number for reference --- .../spark/sql/execution/datasources/DataSourceStrategy.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index 2e5173feb8aa..fd036db69b8c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -497,6 +497,7 @@ object DataSourceStrategy { Some(sources.IsNotNull(a.name)) case expressions.And(left, right) => + // See SPARK-12218 and PR 10362 for detailed discussion for { leftFilter <- translateFilter(left) rightFilter <- translateFilter(right) From 635768e65542cf66f5db92ccf5088136a55443f5 Mon Sep 17 00:00:00 2001 From: Jia Li Date: Sun, 19 Nov 2017 23:52:07 -0800 Subject: [PATCH 03/10] address comment to explain the fix with example --- .../sql/execution/datasources/DataSourceStrategy.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index fd036db69b8c..adbbe2513f83 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -498,6 +498,14 @@ object DataSourceStrategy { case expressions.And(left, right) => // See SPARK-12218 and PR 10362 for detailed discussion + // It is not safe to just convert one side if we do not understand the + // other side. Here is an example used to explain the reason. + // Let's say we have (a = 2 AND trim(b) = 'blah') OR (c > 0) + // and we do not understand how to convert trim(b) = 'blah'. + // If we only convert a = 2, we will end up with + // (a = 2) OR (c > 0), which will generate wrong results. + // Pushing one leg of AND down is only safe to do at the top level. + // You can see ParquetFilters' createFilter for more details. for { leftFilter <- translateFilter(left) rightFilter <- translateFilter(right) From 3bb7d3cc83d660e1fb2aefd9681243fb17ed18fa Mon Sep 17 00:00:00 2001 From: Jia Li Date: Mon, 20 Nov 2017 00:11:31 -0800 Subject: [PATCH 04/10] address comment to leave just JIRA number there --- .../spark/sql/execution/datasources/DataSourceStrategy.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index adbbe2513f83..400f2e03165b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -497,7 +497,7 @@ object DataSourceStrategy { Some(sources.IsNotNull(a.name)) case expressions.And(left, right) => - // See SPARK-12218 and PR 10362 for detailed discussion + // See SPARK-12218 for detailed discussion // It is not safe to just convert one side if we do not understand the // other side. Here is an example used to explain the reason. // Let's say we have (a = 2 AND trim(b) = 'blah') OR (c > 0) From 0aebdfb9e823cc6cf062a41866040b7f37d1c7a4 Mon Sep 17 00:00:00 2001 From: Jia Li Date: Mon, 20 Nov 2017 22:07:58 -0800 Subject: [PATCH 05/10] address comment to add a new DataSourceStrategySuite to test the translateFilter --- .../datasources/DataSourceStrategySuite.scala | 305 ++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala new file mode 100644 index 000000000000..e59a7b71fbd3 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala @@ -0,0 +1,305 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import org.apache.spark.sql.{sources, QueryTest} +import org.apache.spark.sql.catalyst.expressions +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.sql.types._ + + +class DataSourceStrategySuite extends QueryTest with SharedSQLContext { + + test("translate simple expression") { + val attrInt = AttributeReference("cint", IntegerType)() + val attrStr = AttributeReference("cstr", StringType)() + + assertResult(Some(sources.EqualTo("cint", 1))) { + DataSourceStrategy.translateFilter( + expressions.EqualTo(attrInt, Literal(1))) + } + assertResult(Some(sources.EqualTo("cint", 1))) { + DataSourceStrategy.translateFilter( + expressions.EqualTo(Literal(1), attrInt)) + } + + assertResult(Some(sources.EqualNullSafe("cstr", null))) { + DataSourceStrategy.translateFilter( + expressions.EqualNullSafe(attrStr, Literal(null))) + } + assertResult(Some(sources.EqualNullSafe("cstr", null))) { + DataSourceStrategy.translateFilter( + expressions.EqualNullSafe(Literal(null), attrStr)) + } + + assertResult(Some(sources.GreaterThan("cint", 1))) { + DataSourceStrategy.translateFilter( + expressions.GreaterThan(attrInt, Literal(1))) + } + assertResult(Some(sources.GreaterThan("cint", 1))) { + DataSourceStrategy.translateFilter( + expressions.LessThan(Literal(1), attrInt)) + } + + assertResult(Some(sources.LessThan("cint", 1))) { + DataSourceStrategy.translateFilter( + expressions.LessThan(attrInt, Literal(1))) + } + assertResult(Some(sources.LessThan("cint", 1))) { + DataSourceStrategy.translateFilter( + expressions.GreaterThan(Literal(1), attrInt)) + } + + assertResult(Some(sources.GreaterThanOrEqual("cint", 1))) { + DataSourceStrategy.translateFilter( + expressions.GreaterThanOrEqual(attrInt, Literal(1))) + } + assertResult(Some(sources.GreaterThanOrEqual("cint", 1))) { + DataSourceStrategy.translateFilter( + expressions.LessThanOrEqual(Literal(1), attrInt)) + } + + assertResult(Some(sources.LessThanOrEqual("cint", 1))) { + DataSourceStrategy.translateFilter( + expressions.LessThanOrEqual(attrInt, Literal(1))) + } + assertResult(Some(sources.LessThanOrEqual("cint", 1))) { + DataSourceStrategy.translateFilter( + expressions.GreaterThanOrEqual(Literal(1), attrInt)) + } + + assertResult(Some(sources.In("cint", Array(1, 2, 3)))) { + DataSourceStrategy.translateFilter( + expressions.InSet(attrInt, Set(1, 2, 3))) + } + + assertResult(Some(sources.In("cint", Array(1, 2, 3)))) { + DataSourceStrategy.translateFilter( + expressions.In(attrInt, Seq(Literal(1), Literal(2), Literal(3)))) + } + + assertResult(Some(sources.IsNull("cint"))) { + DataSourceStrategy.translateFilter( + expressions.IsNull(attrInt)) + } + assertResult(Some(sources.IsNotNull("cint"))) { + DataSourceStrategy.translateFilter( + expressions.IsNotNull(attrInt)) + } + + assertResult(Some(sources.And( + sources.GreaterThan("cint", 1), + sources.LessThan("cint", 10)))) { + DataSourceStrategy.translateFilter(expressions.And( + expressions.GreaterThan(attrInt, Literal(1)), + expressions.LessThan(attrInt, Literal(10)) + )) + } + + assertResult(Some(sources.Or( + sources.GreaterThanOrEqual("cint", 8), + sources.LessThanOrEqual("cint", 2)))) { + DataSourceStrategy.translateFilter(expressions.Or( + expressions.GreaterThanOrEqual(attrInt, Literal(8)), + expressions.LessThanOrEqual(attrInt, Literal(2)) + )) + } + + assertResult(Some(sources.Not( + sources.GreaterThanOrEqual("cint", 8)))) { + DataSourceStrategy.translateFilter( + expressions.Not(expressions.GreaterThanOrEqual(attrInt, Literal(8)) + )) + } + + assertResult(Some(sources.StringStartsWith("cstr", "a"))) { + DataSourceStrategy.translateFilter( + expressions.StartsWith(attrStr, Literal("a") + )) + } + + assertResult(Some(sources.StringEndsWith("cstr", "a"))) { + DataSourceStrategy.translateFilter( + expressions.EndsWith(attrStr, Literal("a") + )) + } + + assertResult(Some(sources.StringContains("cstr", "a"))) { + DataSourceStrategy.translateFilter( + expressions.Contains(attrStr, Literal("a") + )) + } + } + + test("translate complex expression") { + val attrInt = AttributeReference("cint", IntegerType)() + + assertResult(None) { + DataSourceStrategy.translateFilter( + expressions.LessThanOrEqual( + expressions.Subtract(expressions.Abs(attrInt), Literal(2)), Literal(1))) + } + + assertResult(Some(sources.Or( + sources.And( + sources.GreaterThan("cint", 1), + sources.LessThan("cint", 10)), + sources.And( + sources.GreaterThan("cint", 50), + sources.LessThan("cint", 100))))) { + DataSourceStrategy.translateFilter(expressions.Or( + expressions.And( + expressions.GreaterThan(attrInt, Literal(1)), + expressions.LessThan(attrInt, Literal(10)) + ), + expressions.And( + expressions.GreaterThan(attrInt, Literal(50)), + expressions.LessThan(attrInt, Literal(100)) + ) + )) + } + // SPARK-22548 Incorrect nested AND expression pushed down to JDBC data source + assertResult(None) { + DataSourceStrategy.translateFilter(expressions.Or( + expressions.And( + expressions.GreaterThan(attrInt, Literal(1)), + expressions.LessThan( + expressions.Abs(attrInt), + Literal(10)) + ), + expressions.And( + expressions.GreaterThan(attrInt, Literal(50)), + expressions.LessThan(attrInt, Literal(100)) + ) + )) + } + assertResult(None) { + DataSourceStrategy.translateFilter( + expressions.Not(expressions.And( + expressions.Or( + expressions.LessThanOrEqual(attrInt, Literal(1)), + expressions.GreaterThanOrEqual( + expressions.Abs(attrInt), + Literal(10)) + ), + expressions.Or( + expressions.LessThanOrEqual(attrInt, Literal(50)), + expressions.GreaterThanOrEqual(attrInt, Literal(100)) + ) + ))) + } + + assertResult(Some(sources.Or( + sources.Or( + sources.EqualTo("cint", 1), + sources.EqualTo("cint", 10)), + sources.Or( + sources.GreaterThan("cint", 0), + sources.LessThan("cint", -10))))) { + DataSourceStrategy.translateFilter(expressions.Or( + expressions.Or( + expressions.EqualTo(attrInt, Literal(1)), + expressions.EqualTo(attrInt, Literal(10)) + ), + expressions.Or( + expressions.GreaterThan(attrInt, Literal(0)), + expressions.LessThan(attrInt, Literal(-10)) + ) + )) + } + assertResult(None) { + DataSourceStrategy.translateFilter(expressions.Or( + expressions.Or( + expressions.EqualTo(attrInt, Literal(1)), + expressions.EqualTo( + expressions.Abs(attrInt), + Literal(10)) + ), + expressions.Or( + expressions.GreaterThan(attrInt, Literal(0)), + expressions.LessThan(attrInt, Literal(-10)) + ) + )) + } + + assertResult(Some(sources.And( + sources.And( + sources.GreaterThan("cint", 1), + sources.LessThan("cint", 10)), + sources.And( + sources.EqualTo("cint", 6), + sources.IsNotNull("cint"))))) { + DataSourceStrategy.translateFilter(expressions.And( + expressions.And( + expressions.GreaterThan(attrInt, Literal(1)), + expressions.LessThan(attrInt, Literal(10)) + ), + expressions.And( + expressions.EqualTo(attrInt, Literal(6)), + expressions.IsNotNull(attrInt) + ) + )) + } + assertResult(None) { + DataSourceStrategy.translateFilter(expressions.And( + expressions.And( + expressions.GreaterThan(attrInt, Literal(1)), + expressions.LessThan(attrInt, Literal(10)) + ), + expressions.And( + expressions.EqualTo(expressions.Abs(attrInt), + Literal(6)), + expressions.IsNotNull(attrInt) + ) + )) + } + + assertResult(Some(sources.And( + sources.Or( + sources.GreaterThan("cint", 1), + sources.LessThan("cint", 10)), + sources.Or( + sources.EqualTo("cint", 6), + sources.IsNotNull("cint"))))) { + DataSourceStrategy.translateFilter(expressions.And( + expressions.Or( + expressions.GreaterThan(attrInt, Literal(1)), + expressions.LessThan(attrInt, Literal(10)) + ), + expressions.Or( + expressions.EqualTo(attrInt, Literal(6)), + expressions.IsNotNull(attrInt) + ) + )) + } + assertResult(None) { + DataSourceStrategy.translateFilter(expressions.And( + expressions.Or( + expressions.GreaterThan(attrInt, Literal(1)), + expressions.LessThan(attrInt, Literal(10)) + ), + expressions.Or( + expressions.EqualTo(expressions.Abs(attrInt), + Literal(6)), + expressions.IsNotNull(attrInt) + ) + )) + } + } +} From ba061815352b617b129a46c6482b27c111cba88c Mon Sep 17 00:00:00 2001 From: Jia Li Date: Mon, 20 Nov 2017 22:58:29 -0800 Subject: [PATCH 06/10] address comment to extend QueryTest --- .../sql/execution/datasources/DataSourceStrategySuite.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala index e59a7b71fbd3..4a5c79288af2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala @@ -17,14 +17,15 @@ package org.apache.spark.sql.execution.datasources -import org.apache.spark.sql.{sources, QueryTest} import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.sources import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ -class DataSourceStrategySuite extends QueryTest with SharedSQLContext { +class DataSourceStrategySuite extends PlanTest with SharedSQLContext { test("translate simple expression") { val attrInt = AttributeReference("cint", IntegerType)() From fc34568e0e99f8455bc2e3b030401330b9dc430a Mon Sep 17 00:00:00 2001 From: Jia Li Date: Mon, 20 Nov 2017 23:19:33 -0800 Subject: [PATCH 07/10] address comment to improve test suite --- .../datasources/DataSourceStrategySuite.scala | 116 +++++++++--------- 1 file changed, 56 insertions(+), 60 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala index 4a5c79288af2..5cfe11f48485 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala @@ -17,27 +17,27 @@ package org.apache.spark.sql.execution.datasources +import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.sources import org.apache.spark.sql.test.SharedSQLContext -import org.apache.spark.sql.types._ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { test("translate simple expression") { - val attrInt = AttributeReference("cint", IntegerType)() - val attrStr = AttributeReference("cstr", StringType)() + val attrInt = 'cint.int + val attrStr = 'cstr.string assertResult(Some(sources.EqualTo("cint", 1))) { DataSourceStrategy.translateFilter( - expressions.EqualTo(attrInt, Literal(1))) + expressions.EqualTo(attrInt, 1)) } assertResult(Some(sources.EqualTo("cint", 1))) { DataSourceStrategy.translateFilter( - expressions.EqualTo(Literal(1), attrInt)) + expressions.EqualTo(1, attrInt)) } assertResult(Some(sources.EqualNullSafe("cstr", null))) { @@ -51,38 +51,38 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { assertResult(Some(sources.GreaterThan("cint", 1))) { DataSourceStrategy.translateFilter( - expressions.GreaterThan(attrInt, Literal(1))) + expressions.GreaterThan(attrInt, 1)) } assertResult(Some(sources.GreaterThan("cint", 1))) { DataSourceStrategy.translateFilter( - expressions.LessThan(Literal(1), attrInt)) + expressions.LessThan(1, attrInt)) } assertResult(Some(sources.LessThan("cint", 1))) { DataSourceStrategy.translateFilter( - expressions.LessThan(attrInt, Literal(1))) + expressions.LessThan(attrInt, 1)) } assertResult(Some(sources.LessThan("cint", 1))) { DataSourceStrategy.translateFilter( - expressions.GreaterThan(Literal(1), attrInt)) + expressions.GreaterThan(1, attrInt)) } assertResult(Some(sources.GreaterThanOrEqual("cint", 1))) { DataSourceStrategy.translateFilter( - expressions.GreaterThanOrEqual(attrInt, Literal(1))) + expressions.GreaterThanOrEqual(attrInt, 1)) } assertResult(Some(sources.GreaterThanOrEqual("cint", 1))) { DataSourceStrategy.translateFilter( - expressions.LessThanOrEqual(Literal(1), attrInt)) + expressions.LessThanOrEqual(1, attrInt)) } assertResult(Some(sources.LessThanOrEqual("cint", 1))) { DataSourceStrategy.translateFilter( - expressions.LessThanOrEqual(attrInt, Literal(1))) + expressions.LessThanOrEqual(attrInt, 1)) } assertResult(Some(sources.LessThanOrEqual("cint", 1))) { DataSourceStrategy.translateFilter( - expressions.GreaterThanOrEqual(Literal(1), attrInt)) + expressions.GreaterThanOrEqual(1, attrInt)) } assertResult(Some(sources.In("cint", Array(1, 2, 3)))) { @@ -92,7 +92,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { assertResult(Some(sources.In("cint", Array(1, 2, 3)))) { DataSourceStrategy.translateFilter( - expressions.In(attrInt, Seq(Literal(1), Literal(2), Literal(3)))) + expressions.In(attrInt, Seq(1, 2, 3))) } assertResult(Some(sources.IsNull("cint"))) { @@ -108,8 +108,8 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { sources.GreaterThan("cint", 1), sources.LessThan("cint", 10)))) { DataSourceStrategy.translateFilter(expressions.And( - expressions.GreaterThan(attrInt, Literal(1)), - expressions.LessThan(attrInt, Literal(10)) + expressions.GreaterThan(attrInt, 1), + expressions.LessThan(attrInt, 10) )) } @@ -117,44 +117,44 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { sources.GreaterThanOrEqual("cint", 8), sources.LessThanOrEqual("cint", 2)))) { DataSourceStrategy.translateFilter(expressions.Or( - expressions.GreaterThanOrEqual(attrInt, Literal(8)), - expressions.LessThanOrEqual(attrInt, Literal(2)) + expressions.GreaterThanOrEqual(attrInt, 8), + expressions.LessThanOrEqual(attrInt, 2) )) } assertResult(Some(sources.Not( sources.GreaterThanOrEqual("cint", 8)))) { DataSourceStrategy.translateFilter( - expressions.Not(expressions.GreaterThanOrEqual(attrInt, Literal(8)) + expressions.Not(expressions.GreaterThanOrEqual(attrInt, 8) )) } assertResult(Some(sources.StringStartsWith("cstr", "a"))) { DataSourceStrategy.translateFilter( - expressions.StartsWith(attrStr, Literal("a") + expressions.StartsWith(attrStr, "a" )) } assertResult(Some(sources.StringEndsWith("cstr", "a"))) { DataSourceStrategy.translateFilter( - expressions.EndsWith(attrStr, Literal("a") + expressions.EndsWith(attrStr, "a" )) } assertResult(Some(sources.StringContains("cstr", "a"))) { DataSourceStrategy.translateFilter( - expressions.Contains(attrStr, Literal("a") + expressions.Contains(attrStr, "a" )) } } test("translate complex expression") { - val attrInt = AttributeReference("cint", IntegerType)() + val attrInt = 'cint.int assertResult(None) { DataSourceStrategy.translateFilter( expressions.LessThanOrEqual( - expressions.Subtract(expressions.Abs(attrInt), Literal(2)), Literal(1))) + expressions.Subtract(expressions.Abs(attrInt), 2), 1)) } assertResult(Some(sources.Or( @@ -166,12 +166,12 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { sources.LessThan("cint", 100))))) { DataSourceStrategy.translateFilter(expressions.Or( expressions.And( - expressions.GreaterThan(attrInt, Literal(1)), - expressions.LessThan(attrInt, Literal(10)) + expressions.GreaterThan(attrInt, 1), + expressions.LessThan(attrInt, 10) ), expressions.And( - expressions.GreaterThan(attrInt, Literal(50)), - expressions.LessThan(attrInt, Literal(100)) + expressions.GreaterThan(attrInt, 50), + expressions.LessThan(attrInt, 100) ) )) } @@ -179,14 +179,13 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { assertResult(None) { DataSourceStrategy.translateFilter(expressions.Or( expressions.And( - expressions.GreaterThan(attrInt, Literal(1)), + expressions.GreaterThan(attrInt, 1), expressions.LessThan( - expressions.Abs(attrInt), - Literal(10)) + expressions.Abs(attrInt), 10) ), expressions.And( - expressions.GreaterThan(attrInt, Literal(50)), - expressions.LessThan(attrInt, Literal(100)) + expressions.GreaterThan(attrInt, 50), + expressions.LessThan(attrInt, 100) ) )) } @@ -194,14 +193,14 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { DataSourceStrategy.translateFilter( expressions.Not(expressions.And( expressions.Or( - expressions.LessThanOrEqual(attrInt, Literal(1)), + expressions.LessThanOrEqual(attrInt, 1), expressions.GreaterThanOrEqual( expressions.Abs(attrInt), - Literal(10)) + 10) ), expressions.Or( - expressions.LessThanOrEqual(attrInt, Literal(50)), - expressions.GreaterThanOrEqual(attrInt, Literal(100)) + expressions.LessThanOrEqual(attrInt, 50), + expressions.GreaterThanOrEqual(attrInt, 100) ) ))) } @@ -215,26 +214,25 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { sources.LessThan("cint", -10))))) { DataSourceStrategy.translateFilter(expressions.Or( expressions.Or( - expressions.EqualTo(attrInt, Literal(1)), - expressions.EqualTo(attrInt, Literal(10)) + expressions.EqualTo(attrInt, 1), + expressions.EqualTo(attrInt, 10) ), expressions.Or( - expressions.GreaterThan(attrInt, Literal(0)), - expressions.LessThan(attrInt, Literal(-10)) + expressions.GreaterThan(attrInt, 0), + expressions.LessThan(attrInt, -10) ) )) } assertResult(None) { DataSourceStrategy.translateFilter(expressions.Or( expressions.Or( - expressions.EqualTo(attrInt, Literal(1)), + expressions.EqualTo(attrInt, 1), expressions.EqualTo( - expressions.Abs(attrInt), - Literal(10)) + expressions.Abs(attrInt), 10) ), expressions.Or( - expressions.GreaterThan(attrInt, Literal(0)), - expressions.LessThan(attrInt, Literal(-10)) + expressions.GreaterThan(attrInt, 0), + expressions.LessThan(attrInt, -10) ) )) } @@ -248,11 +246,11 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { sources.IsNotNull("cint"))))) { DataSourceStrategy.translateFilter(expressions.And( expressions.And( - expressions.GreaterThan(attrInt, Literal(1)), - expressions.LessThan(attrInt, Literal(10)) + expressions.GreaterThan(attrInt, 1), + expressions.LessThan(attrInt, 10) ), expressions.And( - expressions.EqualTo(attrInt, Literal(6)), + expressions.EqualTo(attrInt, 6), expressions.IsNotNull(attrInt) ) )) @@ -260,12 +258,11 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { assertResult(None) { DataSourceStrategy.translateFilter(expressions.And( expressions.And( - expressions.GreaterThan(attrInt, Literal(1)), - expressions.LessThan(attrInt, Literal(10)) + expressions.GreaterThan(attrInt, 1), + expressions.LessThan(attrInt, 10) ), expressions.And( - expressions.EqualTo(expressions.Abs(attrInt), - Literal(6)), + expressions.EqualTo(expressions.Abs(attrInt), 6), expressions.IsNotNull(attrInt) ) )) @@ -280,11 +277,11 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { sources.IsNotNull("cint"))))) { DataSourceStrategy.translateFilter(expressions.And( expressions.Or( - expressions.GreaterThan(attrInt, Literal(1)), - expressions.LessThan(attrInt, Literal(10)) + expressions.GreaterThan(attrInt, 1), + expressions.LessThan(attrInt, 10) ), expressions.Or( - expressions.EqualTo(attrInt, Literal(6)), + expressions.EqualTo(attrInt, 6), expressions.IsNotNull(attrInt) ) )) @@ -292,12 +289,11 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { assertResult(None) { DataSourceStrategy.translateFilter(expressions.And( expressions.Or( - expressions.GreaterThan(attrInt, Literal(1)), - expressions.LessThan(attrInt, Literal(10)) + expressions.GreaterThan(attrInt, 1), + expressions.LessThan(attrInt, 10) ), expressions.Or( - expressions.EqualTo(expressions.Abs(attrInt), - Literal(6)), + expressions.EqualTo(expressions.Abs(attrInt), 6), expressions.IsNotNull(attrInt) ) )) From 0cbb528d974167bfe3740d10e15f784dcb40f2f1 Mon Sep 17 00:00:00 2001 From: Jia Li Date: Tue, 21 Nov 2017 00:28:58 -0800 Subject: [PATCH 08/10] address comments to improve tests --- .../datasources/DataSourceStrategySuite.scala | 7 ++++- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 27 +++---------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala index 5cfe11f48485..c8289bec6deb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala @@ -150,7 +150,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { test("translate complex expression") { val attrInt = 'cint.int - + // Functions such as 'Abs' are not supported assertResult(None) { DataSourceStrategy.translateFilter( expressions.LessThanOrEqual( @@ -176,6 +176,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { )) } // SPARK-22548 Incorrect nested AND expression pushed down to JDBC data source + // Functions such as 'Abs' are not supported assertResult(None) { DataSourceStrategy.translateFilter(expressions.Or( expressions.And( @@ -189,6 +190,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { ) )) } + // Functions such as 'Abs' are not supported assertResult(None) { DataSourceStrategy.translateFilter( expressions.Not(expressions.And( @@ -223,6 +225,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { ) )) } + // Functions such as 'Abs' are not supported assertResult(None) { DataSourceStrategy.translateFilter(expressions.Or( expressions.Or( @@ -255,6 +258,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { ) )) } + // Functions such as 'Abs' are not supported assertResult(None) { DataSourceStrategy.translateFilter(expressions.And( expressions.And( @@ -286,6 +290,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { ) )) } + // Functions such as 'Abs' are not supported assertResult(None) { DataSourceStrategy.translateFilter(expressions.And( expressions.Or( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 372aac637341..42babb2671df 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -296,33 +296,14 @@ class JDBCSuite extends SparkFunSuite // The older versions of spark have this kind of bugs in parquet data source. val df1 = sql("SELECT * FROM foobar WHERE NOT (THEID != 2 AND NAME != 'mary')") val df2 = sql("SELECT * FROM foobar WHERE NOT (THEID != 2) OR NOT (NAME != 'mary')") - val df3 = sql("SELECT * FROM foobar WHERE (THEID > 0 AND NAME = 'mary') OR (NAME = 'fred')") - val df4 = sql("SELECT * FROM foobar " + - "WHERE (THEID > 0 AND TRIM(NAME) = 'mary') OR (NAME = 'fred')") - val df5 = sql("SELECT * FROM foobar " + - "WHERE THEID > 0 AND TRIM(NAME) = 'mary' AND LENGTH(NAME) > 3") - val df6 = sql("SELECT * FROM foobar " + - "WHERE THEID < 0 OR NAME = 'mary' OR NAME = 'fred'") - val df7 = sql("SELECT * FROM foobar " + - "WHERE THEID < 0 OR TRIM(NAME) = 'mary' OR NAME = 'fred'") - val df8 = sql("SELECT * FROM foobar " + - "WHERE NOT((THEID < 0 OR NAME != 'mary') AND (THEID != 1 OR NAME != 'fred'))") - val df9 = sql("SELECT * FROM foobar " + - "WHERE NOT((THEID < 0 OR NAME != 'mary') AND (THEID != 1 OR TRIM(NAME) != 'fred'))") - val df10 = sql("SELECT * FROM foobar " + - "WHERE (NOT(THEID < 0 OR TRIM(NAME) != 'mary')) OR (THEID = 1 AND NAME = 'fred')") assert(df1.collect.toSet === Set(Row("mary", 2))) assert(df2.collect.toSet === Set(Row("mary", 2))) - assert(df3.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) - assert(df4.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) - assert(df5.collect.toSet === Set(Row("mary", 2))) - assert(df6.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) - assert(df7.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) - assert(df8.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) - assert(df9.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) - assert(df10.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) + // SPARK-22548: Incorrect nested AND expression pushed down to JDBC data source + val df3 = sql("SELECT * FROM foobar " + + "WHERE (THEID > 0 AND TRIM(NAME) = 'mary') OR (NAME = 'fred')") + assert(df3.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) def checkNotPushdown(df: DataFrame): DataFrame = { val parentPlan = df.queryExecution.executedPlan From a0b3d4e990cd7024b532593bca321499001fc89b Mon Sep 17 00:00:00 2001 From: Jia Li Date: Tue, 21 Nov 2017 14:33:54 -0800 Subject: [PATCH 09/10] address comments to polish test suite --- .../datasources/DataSourceStrategySuite.scala | 428 +++++++----------- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 9 +- 2 files changed, 179 insertions(+), 258 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala index c8289bec6deb..c3037d8cf78e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala @@ -18,290 +18,214 @@ package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.sources import org.apache.spark.sql.test.SharedSQLContext - class DataSourceStrategySuite extends PlanTest with SharedSQLContext { test("translate simple expression") { val attrInt = 'cint.int val attrStr = 'cstr.string - assertResult(Some(sources.EqualTo("cint", 1))) { - DataSourceStrategy.translateFilter( - expressions.EqualTo(attrInt, 1)) - } - assertResult(Some(sources.EqualTo("cint", 1))) { - DataSourceStrategy.translateFilter( - expressions.EqualTo(1, attrInt)) - } + testTranslateFilter(EqualTo(attrInt, 1), Some(sources.EqualTo("cint", 1))) + testTranslateFilter(EqualTo(1, attrInt), Some(sources.EqualTo("cint", 1))) - assertResult(Some(sources.EqualNullSafe("cstr", null))) { - DataSourceStrategy.translateFilter( - expressions.EqualNullSafe(attrStr, Literal(null))) - } - assertResult(Some(sources.EqualNullSafe("cstr", null))) { - DataSourceStrategy.translateFilter( - expressions.EqualNullSafe(Literal(null), attrStr)) - } + testTranslateFilter(EqualNullSafe(attrStr, Literal(null)), + Some(sources.EqualNullSafe("cstr", null))) + testTranslateFilter(EqualNullSafe(Literal(null), attrStr), + Some(sources.EqualNullSafe("cstr", null))) - assertResult(Some(sources.GreaterThan("cint", 1))) { - DataSourceStrategy.translateFilter( - expressions.GreaterThan(attrInt, 1)) - } - assertResult(Some(sources.GreaterThan("cint", 1))) { - DataSourceStrategy.translateFilter( - expressions.LessThan(1, attrInt)) - } + testTranslateFilter(GreaterThan(attrInt, 1), Some(sources.GreaterThan("cint", 1))) + testTranslateFilter(GreaterThan(1, attrInt), Some(sources.LessThan("cint", 1))) - assertResult(Some(sources.LessThan("cint", 1))) { - DataSourceStrategy.translateFilter( - expressions.LessThan(attrInt, 1)) - } - assertResult(Some(sources.LessThan("cint", 1))) { - DataSourceStrategy.translateFilter( - expressions.GreaterThan(1, attrInt)) - } + testTranslateFilter(LessThan(attrInt, 1), Some(sources.LessThan("cint", 1))) + testTranslateFilter(LessThan(1, attrInt), Some(sources.GreaterThan("cint", 1))) - assertResult(Some(sources.GreaterThanOrEqual("cint", 1))) { - DataSourceStrategy.translateFilter( - expressions.GreaterThanOrEqual(attrInt, 1)) - } - assertResult(Some(sources.GreaterThanOrEqual("cint", 1))) { - DataSourceStrategy.translateFilter( - expressions.LessThanOrEqual(1, attrInt)) - } + testTranslateFilter(GreaterThanOrEqual(attrInt, 1), Some(sources.GreaterThanOrEqual("cint", 1))) + testTranslateFilter(GreaterThanOrEqual(1, attrInt), Some(sources.LessThanOrEqual("cint", 1))) - assertResult(Some(sources.LessThanOrEqual("cint", 1))) { - DataSourceStrategy.translateFilter( - expressions.LessThanOrEqual(attrInt, 1)) - } - assertResult(Some(sources.LessThanOrEqual("cint", 1))) { - DataSourceStrategy.translateFilter( - expressions.GreaterThanOrEqual(1, attrInt)) - } + testTranslateFilter(LessThanOrEqual(attrInt, 1), Some(sources.LessThanOrEqual("cint", 1))) + testTranslateFilter(LessThanOrEqual(1, attrInt), Some(sources.GreaterThanOrEqual("cint", 1))) - assertResult(Some(sources.In("cint", Array(1, 2, 3)))) { - DataSourceStrategy.translateFilter( - expressions.InSet(attrInt, Set(1, 2, 3))) - } + testTranslateFilter(InSet(attrInt, Set(1, 2, 3)), Some(sources.In("cint", Array(1, 2, 3)))) - assertResult(Some(sources.In("cint", Array(1, 2, 3)))) { - DataSourceStrategy.translateFilter( - expressions.In(attrInt, Seq(1, 2, 3))) - } + testTranslateFilter(In(attrInt, Seq(1, 2, 3)), Some(sources.In("cint", Array(1, 2, 3)))) - assertResult(Some(sources.IsNull("cint"))) { - DataSourceStrategy.translateFilter( - expressions.IsNull(attrInt)) - } - assertResult(Some(sources.IsNotNull("cint"))) { - DataSourceStrategy.translateFilter( - expressions.IsNotNull(attrInt)) - } + testTranslateFilter(IsNull(attrInt), Some(sources.IsNull("cint"))) + testTranslateFilter(IsNotNull(attrInt), Some(sources.IsNotNull("cint"))) - assertResult(Some(sources.And( - sources.GreaterThan("cint", 1), - sources.LessThan("cint", 10)))) { - DataSourceStrategy.translateFilter(expressions.And( - expressions.GreaterThan(attrInt, 1), - expressions.LessThan(attrInt, 10) - )) - } + // cint > 1 AND cint < 10 + testTranslateFilter(And( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10)), + Some(sources.And( + sources.GreaterThan("cint", 1), + sources.LessThan("cint", 10)))) - assertResult(Some(sources.Or( - sources.GreaterThanOrEqual("cint", 8), - sources.LessThanOrEqual("cint", 2)))) { - DataSourceStrategy.translateFilter(expressions.Or( - expressions.GreaterThanOrEqual(attrInt, 8), - expressions.LessThanOrEqual(attrInt, 2) - )) - } + // cint >= 8 OR cint <= 2 + testTranslateFilter(Or( + GreaterThanOrEqual(attrInt, 8), + LessThanOrEqual(attrInt, 2)), + Some(sources.Or( + sources.GreaterThanOrEqual("cint", 8), + sources.LessThanOrEqual("cint", 2)))) - assertResult(Some(sources.Not( - sources.GreaterThanOrEqual("cint", 8)))) { - DataSourceStrategy.translateFilter( - expressions.Not(expressions.GreaterThanOrEqual(attrInt, 8) - )) - } + testTranslateFilter(Not(GreaterThanOrEqual(attrInt, 8)), + Some(sources.Not(sources.GreaterThanOrEqual("cint", 8)))) - assertResult(Some(sources.StringStartsWith("cstr", "a"))) { - DataSourceStrategy.translateFilter( - expressions.StartsWith(attrStr, "a" - )) - } + testTranslateFilter(StartsWith(attrStr, "a"), Some(sources.StringStartsWith("cstr", "a"))) - assertResult(Some(sources.StringEndsWith("cstr", "a"))) { - DataSourceStrategy.translateFilter( - expressions.EndsWith(attrStr, "a" - )) - } + testTranslateFilter(EndsWith(attrStr, "a"), Some(sources.StringEndsWith("cstr", "a"))) - assertResult(Some(sources.StringContains("cstr", "a"))) { - DataSourceStrategy.translateFilter( - expressions.Contains(attrStr, "a" - )) - } + testTranslateFilter(Contains(attrStr, "a"), Some(sources.StringContains("cstr", "a"))) } test("translate complex expression") { val attrInt = 'cint.int - // Functions such as 'Abs' are not supported - assertResult(None) { - DataSourceStrategy.translateFilter( - expressions.LessThanOrEqual( - expressions.Subtract(expressions.Abs(attrInt), 2), 1)) - } - - assertResult(Some(sources.Or( - sources.And( - sources.GreaterThan("cint", 1), - sources.LessThan("cint", 10)), - sources.And( - sources.GreaterThan("cint", 50), - sources.LessThan("cint", 100))))) { - DataSourceStrategy.translateFilter(expressions.Or( - expressions.And( - expressions.GreaterThan(attrInt, 1), - expressions.LessThan(attrInt, 10) - ), - expressions.And( - expressions.GreaterThan(attrInt, 50), - expressions.LessThan(attrInt, 100) - ) - )) - } - // SPARK-22548 Incorrect nested AND expression pushed down to JDBC data source - // Functions such as 'Abs' are not supported - assertResult(None) { - DataSourceStrategy.translateFilter(expressions.Or( - expressions.And( - expressions.GreaterThan(attrInt, 1), - expressions.LessThan( - expressions.Abs(attrInt), 10) - ), - expressions.And( - expressions.GreaterThan(attrInt, 50), - expressions.LessThan(attrInt, 100) - ) - )) - } - // Functions such as 'Abs' are not supported - assertResult(None) { - DataSourceStrategy.translateFilter( - expressions.Not(expressions.And( - expressions.Or( - expressions.LessThanOrEqual(attrInt, 1), - expressions.GreaterThanOrEqual( - expressions.Abs(attrInt), - 10) - ), - expressions.Or( - expressions.LessThanOrEqual(attrInt, 50), - expressions.GreaterThanOrEqual(attrInt, 100) - ) - ))) - } - assertResult(Some(sources.Or( - sources.Or( - sources.EqualTo("cint", 1), - sources.EqualTo("cint", 10)), - sources.Or( - sources.GreaterThan("cint", 0), - sources.LessThan("cint", -10))))) { - DataSourceStrategy.translateFilter(expressions.Or( - expressions.Or( - expressions.EqualTo(attrInt, 1), - expressions.EqualTo(attrInt, 10) - ), - expressions.Or( - expressions.GreaterThan(attrInt, 0), - expressions.LessThan(attrInt, -10) - ) - )) - } - // Functions such as 'Abs' are not supported - assertResult(None) { - DataSourceStrategy.translateFilter(expressions.Or( - expressions.Or( - expressions.EqualTo(attrInt, 1), - expressions.EqualTo( - expressions.Abs(attrInt), 10) - ), - expressions.Or( - expressions.GreaterThan(attrInt, 0), - expressions.LessThan(attrInt, -10) - ) - )) - } + // ABS(cint) - 2 = 1 + testTranslateFilter(LessThanOrEqual( + // Expressions are not supported + // Functions such as 'Abs' are not supported + Subtract(Abs(attrInt), 2), 1), None) + + // (cin1 > 1 AND cint < 10) OR (cint > 50 AND cint > 100) + testTranslateFilter(Or( + And( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10) + ), + And( + GreaterThan(attrInt, 50), + LessThan(attrInt, 100))), + Some(sources.Or( + sources.And( + sources.GreaterThan("cint", 1), + sources.LessThan("cint", 10)), + sources.And( + sources.GreaterThan("cint", 50), + sources.LessThan("cint", 100))))) - assertResult(Some(sources.And( - sources.And( - sources.GreaterThan("cint", 1), - sources.LessThan("cint", 10)), - sources.And( - sources.EqualTo("cint", 6), - sources.IsNotNull("cint"))))) { - DataSourceStrategy.translateFilter(expressions.And( - expressions.And( - expressions.GreaterThan(attrInt, 1), - expressions.LessThan(attrInt, 10) - ), - expressions.And( - expressions.EqualTo(attrInt, 6), - expressions.IsNotNull(attrInt) - ) - )) - } - // Functions such as 'Abs' are not supported - assertResult(None) { - DataSourceStrategy.translateFilter(expressions.And( - expressions.And( - expressions.GreaterThan(attrInt, 1), - expressions.LessThan(attrInt, 10) - ), - expressions.And( - expressions.EqualTo(expressions.Abs(attrInt), 6), - expressions.IsNotNull(attrInt) - ) - )) - } + // SPARK-22548 Incorrect nested AND expression pushed down to JDBC data source + // (cint > 1 AND ABS(cint) < 10) OR (cint < 50 AND cint > 100) + testTranslateFilter(Or( + And( + GreaterThan(attrInt, 1), + // Functions such as 'Abs' are not supported + LessThan(Abs(attrInt), 10) + ), + And( + GreaterThan(attrInt, 50), + LessThan(attrInt, 100))), None) + + // NOT ((cint <= 1 OR ABS(cint) >= 10) AND (cint <= 50 OR cint >= 100)) + testTranslateFilter(Not(And( + Or( + LessThanOrEqual(attrInt, 1), + // Functions such as 'Abs' are not supported + GreaterThanOrEqual(Abs(attrInt), 10) + ), + Or( + LessThanOrEqual(attrInt, 50), + GreaterThanOrEqual(attrInt, 100)))), None) + + // (cint = 1 OR cint = 10) OR (cint > 0 OR cint < -10) + testTranslateFilter(Or( + Or( + EqualTo(attrInt, 1), + EqualTo(attrInt, 10) + ), + Or( + GreaterThan(attrInt, 0), + LessThan(attrInt, -10))), + Some(sources.Or( + sources.Or( + sources.EqualTo("cint", 1), + sources.EqualTo("cint", 10)), + sources.Or( + sources.GreaterThan("cint", 0), + sources.LessThan("cint", -10))))) + + // (cint = 1 OR ABS(cint) = 10) OR (cint > 0 OR cint < -10) + testTranslateFilter(Or( + Or( + EqualTo(attrInt, 1), + // Functions such as 'Abs' are not supported + EqualTo(Abs(attrInt), 10) + ), + Or( + GreaterThan(attrInt, 0), + LessThan(attrInt, -10))), None) + + // In end-to-end testing, conjunctive predicate should has been split + // before reaching DataSourceStrategy.translateFilter. + // This is for UT purpose to test each [[case]]. + // (cint > 1 AND cint < 10) AND (cint = 6 AND cint IS NOT NULL) + testTranslateFilter(And( + And( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10) + ), + And( + EqualTo(attrInt, 6), + IsNotNull(attrInt))), + Some(sources.And( + sources.And( + sources.GreaterThan("cint", 1), + sources.LessThan("cint", 10)), + sources.And( + sources.EqualTo("cint", 6), + sources.IsNotNull("cint"))))) + + // (cint > 1 AND cint < 10) AND (ABS(cint) = 6 AND cint IS NOT NULL) + testTranslateFilter(And( + And( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10) + ), + And( + // Functions such as 'Abs' are not supported + EqualTo(Abs(attrInt), 6), + IsNotNull(attrInt))), None) + + // (cint > 1 OR cint < 10) AND (cint = 6 OR cint IS NOT NULL) + testTranslateFilter(And( + Or( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10) + ), + Or( + EqualTo(attrInt, 6), + IsNotNull(attrInt))), + Some(sources.And( + sources.Or( + sources.GreaterThan("cint", 1), + sources.LessThan("cint", 10)), + sources.Or( + sources.EqualTo("cint", 6), + sources.IsNotNull("cint"))))) + + // (cint > 1 OR cint < 10) AND (cint = 6 OR cint IS NOT NULL) + testTranslateFilter(And( + Or( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10) + ), + Or( + // Functions such as 'Abs' are not supported + EqualTo(Abs(attrInt), 6), + IsNotNull(attrInt))), None) + } - assertResult(Some(sources.And( - sources.Or( - sources.GreaterThan("cint", 1), - sources.LessThan("cint", 10)), - sources.Or( - sources.EqualTo("cint", 6), - sources.IsNotNull("cint"))))) { - DataSourceStrategy.translateFilter(expressions.And( - expressions.Or( - expressions.GreaterThan(attrInt, 1), - expressions.LessThan(attrInt, 10) - ), - expressions.Or( - expressions.EqualTo(attrInt, 6), - expressions.IsNotNull(attrInt) - ) - )) - } - // Functions such as 'Abs' are not supported - assertResult(None) { - DataSourceStrategy.translateFilter(expressions.And( - expressions.Or( - expressions.GreaterThan(attrInt, 1), - expressions.LessThan(attrInt, 10) - ), - expressions.Or( - expressions.EqualTo(expressions.Abs(attrInt), 6), - expressions.IsNotNull(attrInt) - ) - )) + /** + * Translate the given Catalyst [[Expression]] into data source [[sources.Filter]] + * then verify against the given [[sources.Filter]]. + */ + def testTranslateFilter(catalystFilter: Expression, result: Option[sources.Filter]): Unit = { + assertResult(result) { + DataSourceStrategy.translateFilter(catalystFilter) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 42babb2671df..61571bccdcb5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -294,16 +294,13 @@ class JDBCSuite extends SparkFunSuite // This is a test to reflect discussion in SPARK-12218. // The older versions of spark have this kind of bugs in parquet data source. - val df1 = sql("SELECT * FROM foobar WHERE NOT (THEID != 2 AND NAME != 'mary')") - val df2 = sql("SELECT * FROM foobar WHERE NOT (THEID != 2) OR NOT (NAME != 'mary')") - + val df1 = sql("SELECT * FROM foobar WHERE NOT (THEID != 2) OR NOT (NAME != 'mary')") assert(df1.collect.toSet === Set(Row("mary", 2))) - assert(df2.collect.toSet === Set(Row("mary", 2))) // SPARK-22548: Incorrect nested AND expression pushed down to JDBC data source - val df3 = sql("SELECT * FROM foobar " + + val df2 = sql("SELECT * FROM foobar " + "WHERE (THEID > 0 AND TRIM(NAME) = 'mary') OR (NAME = 'fred')") - assert(df3.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) + assert(df2.collect.toSet === Set(Row("fred", 1), Row("mary", 2))) def checkNotPushdown(df: DataFrame): DataFrame = { val parentPlan = df.queryExecution.executedPlan From 7a19ac63fcdae6b67ff989ca90d4a3652c7d02f3 Mon Sep 17 00:00:00 2001 From: Jia Li Date: Tue, 21 Nov 2017 15:17:15 -0800 Subject: [PATCH 10/10] address comments to fix a typo in test case --- .../sql/execution/datasources/DataSourceStrategySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala index c3037d8cf78e..f20aded169e4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala @@ -85,7 +85,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext { test("translate complex expression") { val attrInt = 'cint.int - // ABS(cint) - 2 = 1 + // ABS(cint) - 2 <= 1 testTranslateFilter(LessThanOrEqual( // Expressions are not supported // Functions such as 'Abs' are not supported