-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-17733][SQL] InferFiltersFromConstraints rule never terminates for query #15319
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
b77a4d6
ebba446
7d9e2b0
3b93209
5b25fce
e5912f8
9639c71
1558d4c
388443d
52ef1d8
909d2cd
905eaa1
45308d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,14 +74,26 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT | |
| * additional constraint of the form `b = 5` | ||
| */ | ||
| private def inferAdditionalConstraints(constraints: Set[Expression]): Set[Expression] = { | ||
| // Collect alias from expressions to avoid producing non-converging set of constraints | ||
| // for recursive functions. | ||
| // | ||
| // Don't apply transform on constraints if the attribute used to replace is an alias, | ||
| // because then both `QueryPlan.inferAdditionalConstraints` and | ||
| // `UnaryNode.getAliasedConstraints` applies and may produce a non-converging set of | ||
| // constraints. | ||
| // For more details, infer https://issues.apache.org/jira/browse/SPARK-17733 | ||
| val aliasMap = AttributeMap((expressions ++ children.flatMap(_.expressions)).collect { | ||
|
||
| case a: Alias => (a.toAttribute, a.child) | ||
| }) | ||
|
|
||
| var inferredConstraints = Set.empty[Expression] | ||
| constraints.foreach { | ||
| case eq @ EqualTo(l: Attribute, r: Attribute) => | ||
| inferredConstraints ++= (constraints - eq).map(_ transform { | ||
| case a: Attribute if a.semanticEquals(l) => r | ||
| case a: Attribute if a.semanticEquals(l) && !aliasMap.contains(r) => r | ||
| }) | ||
| inferredConstraints ++= (constraints - eq).map(_ transform { | ||
| case a: Attribute if a.semanticEquals(r) => l | ||
| case a: Attribute if a.semanticEquals(r) && !aliasMap.contains(l) => l | ||
| }) | ||
| case _ => // No inference | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,9 +27,12 @@ import org.apache.spark.sql.catalyst.rules._ | |
| class InferFiltersFromConstraintsSuite extends PlanTest { | ||
|
|
||
| object Optimize extends RuleExecutor[LogicalPlan] { | ||
| val batches = Batch("InferFilters", FixedPoint(5), InferFiltersFromConstraints) :: | ||
| Batch("PredicatePushdown", FixedPoint(5), PushPredicateThroughJoin) :: | ||
| Batch("CombineFilters", FixedPoint(5), CombineFilters) :: Nil | ||
| val batches = | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previous |
||
| Batch("InferAndPushDownFilters", FixedPoint(100), | ||
| PushPredicateThroughJoin, | ||
| PushDownPredicate, | ||
| InferFiltersFromConstraints, | ||
| CombineFilters) :: Nil | ||
| } | ||
|
|
||
| val testRelation = LocalRelation('a.int, 'b.int, 'c.int) | ||
|
|
@@ -120,4 +123,64 @@ class InferFiltersFromConstraintsSuite extends PlanTest { | |
| val optimized = Optimize.execute(originalQuery) | ||
| comparePlans(optimized, correctAnswer) | ||
| } | ||
|
|
||
| test("inner join with alias: alias contains multiple attributes") { | ||
| val t1 = testRelation.subquery('t1) | ||
| val t2 = testRelation.subquery('t2) | ||
|
|
||
| val originalQuery = t1.select('a, Coalesce(Seq('a, 'b)).as('int_col)).as("t") | ||
| .join(t2, Inner, Some("t.a".attr === "t2.a".attr && "t.int_col".attr === "t2.a".attr)) | ||
| .analyze | ||
| val currectAnswer = t1.where(IsNotNull('a) && IsNotNull(Coalesce(Seq('a, 'b))) | ||
|
||
| &&'a === Coalesce(Seq('a, 'b))) | ||
|
||
| .select('a, Coalesce(Seq('a, 'b)).as('int_col)).as("t") | ||
| .join(t2.where(IsNotNull('a)), Inner, | ||
| Some("t.a".attr === "t2.a".attr && "t.int_col".attr === "t2.a".attr)) | ||
| .analyze | ||
| val optimized = Optimize.execute(originalQuery) | ||
| comparePlans(optimized, currectAnswer) | ||
| } | ||
|
|
||
| test("inner join with alias: alias contains single attributes") { | ||
| val t1 = testRelation.subquery('t1) | ||
| val t2 = testRelation.subquery('t2) | ||
|
|
||
| val originalQuery = t1.select('a, 'b.as('d)).as("t") | ||
| .join(t2, Inner, Some("t.a".attr === "t2.a".attr && "t.d".attr === "t2.a".attr)) | ||
| .analyze | ||
| val currectAnswer = t1.where(IsNotNull('a) && IsNotNull('b) | ||
|
||
| && 'a <=> 'a && 'b <=> 'b &&'a === 'b) | ||
|
||
| .select('a, 'b.as('d)).as("t") | ||
| .join(t2.where(IsNotNull('a) && 'a <=> 'a), Inner, | ||
| Some("t.a".attr === "t2.a".attr && "t.d".attr === "t2.a".attr)) | ||
| .analyze | ||
| val optimized = Optimize.execute(originalQuery) | ||
| comparePlans(optimized, currectAnswer) | ||
| } | ||
|
|
||
| test("inner join with alias: don't generate constraints for recursive functions") { | ||
| val t1 = testRelation.subquery('t1) | ||
| val t2 = testRelation.subquery('t2) | ||
|
|
||
| val originalQuery = t1.select('a, 'b.as('d), Coalesce(Seq('a, 'b)).as('int_col)).as("t") | ||
| .join(t2, Inner, | ||
| Some("t.a".attr === "t2.a".attr | ||
| && "t.d".attr === "t2.a".attr | ||
| && "t.int_col".attr === "t2.a".attr)) | ||
| .analyze | ||
| val correctAnswer = t1.where(IsNotNull('a) && IsNotNull(Coalesce(Seq('a, 'a))) | ||
| && 'a === Coalesce(Seq('a, 'a)) && 'a <=> Coalesce(Seq('a, 'a)) && 'a <=> 'a | ||
| && 'a === 'b && IsNotNull(Coalesce(Seq('a, 'b))) && 'a === Coalesce(Seq('a, 'b)) | ||
| && IsNotNull('b) && IsNotNull(Coalesce(Seq('b, 'b))) | ||
| && 'b === Coalesce(Seq('b, 'b)) && 'b <=> Coalesce(Seq('b, 'b)) && 'b <=> 'b) | ||
| .select('a, 'b.as('d), Coalesce(Seq('a, 'b)).as('int_col)).as("t") | ||
| .join(t2.where(IsNotNull('a) && IsNotNull(Coalesce(Seq('a, 'a))) | ||
| && 'a === Coalesce(Seq('a, 'a)) && 'a <=> Coalesce(Seq('a, 'a)) && 'a <=> 'a), Inner, | ||
| Some("t.a".attr === "t2.a".attr | ||
| && "t.d".attr === "t2.a".attr | ||
| && "t.int_col".attr === "t2.a".attr)) | ||
| .analyze | ||
| val optimized = Optimize.execute(originalQuery) | ||
| comparePlans(optimized, correctAnswer) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2678,4 +2678,28 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { | |
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-17733 InferFiltersFromConstraints rule never terminates for query") { | ||
|
||
| withTempView("tmpv") { | ||
| spark.range(10).toDF("a").createTempView("tmpv") | ||
|
|
||
| // Just ensure the following query will successfully execute complete. | ||
| assert(sql( | ||
| """ | ||
| |SELECT | ||
| | * | ||
| |FROM ( | ||
| | SELECT | ||
| | COALESCE(t1.a, t2.a) AS int_col, | ||
| | t1.a, | ||
| | t2.a AS b | ||
| | FROM tmpv t1 | ||
| | CROSS JOIN tmpv t2 | ||
| |) t1 | ||
| |INNER JOIN tmpv t2 | ||
| |ON (((t2.a) = (t1.a)) AND ((t2.a) = (t1.int_col))) AND ((t2.a) = (t1.b)) | ||
| """.stripMargin).count() > 0 | ||
| ) | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
typo "infer" -> "refer" (to)?