Skip to content

Commit 51c33bd

Browse files
adrian-ionescugatorsmile
authored andcommitted
[SPARK-22961][REGRESSION] Constant columns should generate QueryPlanConstraints
## What changes were proposed in this pull request? #19201 introduced the following regression: given something like `df.withColumn("c", lit(2))`, we're no longer picking up `c === 2` as a constraint and infer filters from it when joins are involved, which may lead to noticeable performance degradation. This patch re-enables this optimization by picking up Aliases of Literals in Projection lists as constraints and making sure they're not treated as aliased columns. ## How was this patch tested? Unit test was added. Author: Adrian Ionescu <[email protected]> Closes #20155 from adrian-ionescu/constant_constraints.
1 parent 6cff7d1 commit 51c33bd

File tree

3 files changed

+16
-1
lines changed

3 files changed

+16
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,8 @@ abstract class UnaryNode extends LogicalPlan {
247247
protected def getAliasedConstraints(projectList: Seq[NamedExpression]): Set[Expression] = {
248248
var allConstraints = child.constraints.asInstanceOf[Set[Expression]]
249249
projectList.foreach {
250+
case a @ Alias(l: Literal, _) =>
251+
allConstraints += EqualTo(a.toAttribute, l)
250252
case a @ Alias(e, _) =>
251253
// For every alias in `projectList`, replace the reference in constraints by its attribute.
252254
allConstraints ++= allConstraints.map(_ transform {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ trait QueryPlanConstraints { self: LogicalPlan =>
9898
// we may avoid producing recursive constraints.
9999
private lazy val aliasMap: AttributeMap[Expression] = AttributeMap(
100100
expressions.collect {
101-
case a: Alias => (a.toAttribute, a.child)
101+
case a: Alias if !a.child.isInstanceOf[Literal] => (a.toAttribute, a.child)
102102
} ++ children.flatMap(_.asInstanceOf[QueryPlanConstraints].aliasMap))
103103
// Note: the explicit cast is necessary, since Scala compiler fails to infer the type.
104104

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,4 +236,17 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
236236
comparePlans(optimized, originalQuery)
237237
}
238238
}
239+
240+
test("constraints should be inferred from aliased literals") {
241+
val originalLeft = testRelation.subquery('left).as("left")
242+
val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a === 2).as("left")
243+
244+
val right = Project(Seq(Literal(2).as("two")), testRelation.subquery('right)).as("right")
245+
val condition = Some("left.a".attr === "right.two".attr)
246+
247+
val original = originalLeft.join(right, Inner, condition)
248+
val correct = optimizedLeft.join(right, Inner, condition)
249+
250+
comparePlans(Optimize.execute(original.analyze), correct.analyze)
251+
}
239252
}

0 commit comments

Comments
 (0)