Skip to content

Commit bd12eb7

Browse files
gatorsmiledongjoon-hyun
authored andcommitted
[SPARK-25402][SQL][BACKPORT-2.2] Null handling in BooleanSimplification
## What changes were proposed in this pull request? This PR is to fix the null handling in BooleanSimplification. In the rule BooleanSimplification, there are two cases that do not properly handle null values. The optimization is not right if either side is null. This PR is to fix them. ## How was this patch tested? Added test cases Closes #22403 from gatorsmile/backportSpark25402. Authored-by: gatorsmile <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 3158fc3 commit bd12eb7

File tree

3 files changed

+64
-11
lines changed

3 files changed

+64
-11
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,10 +154,15 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
154154
case TrueLiteral Or _ => TrueLiteral
155155
case _ Or TrueLiteral => TrueLiteral
156156

157-
case a And b if Not(a).semanticEquals(b) => FalseLiteral
158-
case a Or b if Not(a).semanticEquals(b) => TrueLiteral
159-
case a And b if a.semanticEquals(Not(b)) => FalseLiteral
160-
case a Or b if a.semanticEquals(Not(b)) => TrueLiteral
157+
case a And b if Not(a).semanticEquals(b) =>
158+
If(IsNull(a), Literal.create(null, a.dataType), FalseLiteral)
159+
case a And b if a.semanticEquals(Not(b)) =>
160+
If(IsNull(b), Literal.create(null, b.dataType), FalseLiteral)
161+
162+
case a Or b if Not(a).semanticEquals(b) =>
163+
If(IsNull(a), Literal.create(null, a.dataType), TrueLiteral)
164+
case a Or b if a.semanticEquals(Not(b)) =>
165+
If(IsNull(b), Literal.create(null, b.dataType), TrueLiteral)
161166

162167
case a And b if a.semanticEquals(b) => a
163168
case a Or b if a.semanticEquals(b) => a

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.spark.sql.catalyst.optimizer
1919

20+
import org.apache.spark.sql.Row
2021
import org.apache.spark.sql.catalyst.analysis._
2122
import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
2223
import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -26,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
2627
import org.apache.spark.sql.catalyst.plans.logical._
2728
import org.apache.spark.sql.catalyst.rules._
2829
import org.apache.spark.sql.internal.SQLConf
29-
import org.apache.spark.sql.Row
30+
import org.apache.spark.sql.types.BooleanType
3031

3132
class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
3233

@@ -37,14 +38,24 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
3738
Batch("Constant Folding", FixedPoint(50),
3839
NullPropagation(conf),
3940
ConstantFolding,
41+
SimplifyConditionals,
4042
BooleanSimplification,
4143
PruneFilters(conf)) :: Nil
4244
}
4345

44-
val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string)
46+
val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string,
47+
'e.boolean, 'f.boolean, 'g.boolean, 'h.boolean)
4548

4649
val testRelationWithData = LocalRelation.fromExternalRows(
47-
testRelation.output, Seq(Row(1, 2, 3, "abc"))
50+
testRelation.output, Seq(Row(1, 2, 3, "abc", true, null, true, null))
51+
)
52+
53+
val testNotNullableRelation = LocalRelation('a.int.notNull, 'b.int.notNull, 'c.int.notNull,
54+
'd.string.notNull, 'e.boolean.notNull, 'f.boolean.notNull, 'g.boolean.notNull,
55+
'h.boolean.notNull)
56+
57+
val testNotNullableRelationWithData = LocalRelation.fromExternalRows(
58+
testNotNullableRelation.output, Seq(Row(1, 2, 3, "abc", true, false, true, false))
4859
)
4960

5061
private def checkCondition(input: Expression, expected: LogicalPlan): Unit = {
@@ -60,6 +71,13 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
6071
comparePlans(actual, correctAnswer)
6172
}
6273

74+
private def checkConditionInNotNullableRelation(
75+
input: Expression, expected: LogicalPlan): Unit = {
76+
val plan = testNotNullableRelationWithData.where(input).analyze
77+
val actual = Optimize.execute(plan)
78+
comparePlans(actual, expected)
79+
}
80+
6381
test("a && a => a") {
6482
checkCondition(Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
6583
checkCondition(Literal(1) < 'a && Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
@@ -173,10 +191,30 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
173191
}
174192

175193
test("Complementation Laws") {
176-
checkCondition('a && !'a, testRelation)
177-
checkCondition(!'a && 'a, testRelation)
194+
checkConditionInNotNullableRelation('e && !'e, testNotNullableRelation)
195+
checkConditionInNotNullableRelation(!'e && 'e, testNotNullableRelation)
196+
197+
checkConditionInNotNullableRelation('e || !'e, testNotNullableRelationWithData)
198+
checkConditionInNotNullableRelation(!'e || 'e, testNotNullableRelationWithData)
199+
}
200+
201+
test("Complementation Laws - null handling") {
202+
checkCondition('e && !'e,
203+
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze)
204+
checkCondition(!'e && 'e,
205+
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze)
206+
207+
checkCondition('e || !'e,
208+
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze)
209+
checkCondition(!'e || 'e,
210+
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze)
211+
}
212+
213+
test("Complementation Laws - negative case") {
214+
checkCondition('e && !'f, testRelationWithData.where('e && !'f).analyze)
215+
checkCondition(!'f && 'e, testRelationWithData.where(!'f && 'e).analyze)
178216

179-
checkCondition('a || !'a, testRelationWithData)
180-
checkCondition(!'a || 'a, testRelationWithData)
217+
checkCondition('e || !'f, testRelationWithData.where('e || !'f).analyze)
218+
checkCondition(!'f || 'e, testRelationWithData.where(!'f || 'e).analyze)
181219
}
182220
}

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1789,4 +1789,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
17891789
test("SPARK-22469: compare string with decimal") {
17901790
checkAnswer(Seq("1.5").toDF("s").filter("s > 0.5"), Row("1.5"))
17911791
}
1792+
1793+
test("SPARK-25402 Null handling in BooleanSimplification") {
1794+
val schema = StructType.fromDDL("a boolean, b int")
1795+
val rows = Seq(Row(null, 1))
1796+
1797+
val rdd = sparkContext.parallelize(rows)
1798+
val df = spark.createDataFrame(rdd, schema)
1799+
1800+
checkAnswer(df.where("(NOT a) OR a"), Seq.empty)
1801+
}
17921802
}

0 commit comments

Comments
 (0)