-
Notifications
You must be signed in to change notification settings - Fork 29.3k
[SPARK-20334][SQL] Return a better error message when correlated predicates contain aggregate expression that has mixture of outer and local references. #17636
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
a266c8d
bb1bdad
ff88651
c4e1a01
af3d367
55c64ca
d986ddc
2411f3e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1210,6 +1210,29 @@ class Analyzer( | |
| private def checkAndGetOuterReferences(sub: LogicalPlan): Seq[Expression] = { | ||
| val outerReferences = ArrayBuffer.empty[Expression] | ||
|
|
||
| // Validate that correlated aggregate expression do not contain a mixture | ||
| // of outer and local references. | ||
| def checkMixedReferencesInsideAggregation(expr: Expression): Unit = { | ||
| expr.foreach { | ||
| case a: AggregateExpression if containsOuter(a) => | ||
| val outer = a.collect { case OuterReference(e) => e.toAttribute } | ||
| val local = a.references -- outer | ||
| if (local.nonEmpty) { | ||
| val msg = | ||
| s""" | ||
| |Found an aggregate expression in a correlated predicate that has both | ||
| |outer and local references, which is not supported yet. | ||
| |Aggregate expression: ${a.sql} | ||
| |Outer references: ${outer.map(_.sql).mkString(", ")} | ||
| |Local references: ${local.map(_.sql).mkString(", ")} | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add a comma at each line |
||
| """. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gatorsmile Thanks. fixed. |
||
| stripMargin.replace("\n", " ").trim() | ||
| failAnalysis(msg) | ||
| } | ||
| case _ => | ||
| } | ||
| } | ||
|
|
||
| // Make sure a plan's subtree does not contain outer references | ||
| def failOnOuterReferenceInSubTree(p: LogicalPlan): Unit = { | ||
| if (hasOuterReferences(p)) { | ||
|
|
@@ -1219,6 +1242,7 @@ class Analyzer( | |
|
|
||
| // Make sure a plan's expressions do not contain outer references | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should change this comment accordingly too.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya Ok. will do. |
||
| def failOnOuterReference(p: LogicalPlan): Unit = { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. failOnOuterReference -> failOnOuterOrMixedReference? Or is it fine to keep it unchanged?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya ok, will change it to failOnInvalidOuterReference. That should include both the cases. |
||
| p.expressions.foreach(checkMixedReferencesInsideAggregation) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya checkMixedReferencesInsideAggregation only looks for AggregateExpression ? For other types its just a pass-through.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok. got it. :-) Btw, |
||
| if (p.expressions.exists(containsOuter)) { | ||
| failAnalysis( | ||
| "Expressions referencing the outer query are not supported outside of WHERE/HAVING " + | ||
|
|
@@ -1305,6 +1329,8 @@ class Analyzer( | |
| case _: EqualTo | _: EqualNullSafe => false | ||
| case _ => true | ||
| } | ||
|
|
||
| correlated.foreach(checkMixedReferencesInsideAggregation(_)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gatorsmile Thanks. fixed. |
||
| // The aggregate expressions are treated in a special way by getOuterReferences. If the | ||
| // aggregate expression contains only outer reference attributes then the entire aggregate | ||
| // expression is isolated as an OuterReference. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -367,6 +367,8 @@ case class OuterReference(e: NamedExpression) | |
| override def exprId: ExprId = e.exprId | ||
| override def toAttribute: Attribute = e.toAttribute | ||
| override def newInstance(): NamedExpression = OuterReference(e.newInstance()) | ||
| override def sql: String = e.sql | ||
| override def toString: String = e.toString | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason to override this? If we keep it unchanged, will it be easier for us to find whether it is an outer reference in the plans?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gatorsmile Thats a good point Sean. I will revert this and error raising code to make sure its printed properly. |
||
| } | ||
|
|
||
| object VirtualColumn { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,42 +1,72 @@ | ||
| -- The test file contains negative test cases | ||
| -- of invalid queries where error messages are expected. | ||
|
|
||
| create temporary view t1 as select * from values | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Those just change for case, right?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya Yeah.. since i was on this test case, thought i should fix the case. |
||
| CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES | ||
| (1, 2, 3) | ||
| as t1(t1a, t1b, t1c); | ||
| AS t1(t1a, t1b, t1c); | ||
|
|
||
| create temporary view t2 as select * from values | ||
| CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES | ||
| (1, 0, 1) | ||
| as t2(t2a, t2b, t2c); | ||
| AS t2(t2a, t2b, t2c); | ||
|
|
||
| create temporary view t3 as select * from values | ||
| CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES | ||
| (3, 1, 2) | ||
| as t3(t3a, t3b, t3c); | ||
| AS t3(t3a, t3b, t3c); | ||
|
|
||
| -- TC 01.01 | ||
| -- The column t2b in the SELECT of the subquery is invalid | ||
| -- because it is neither an aggregate function nor a GROUP BY column. | ||
| select t1a, t2b | ||
| from t1, t2 | ||
| where t1b = t2c | ||
| and t2b = (select max(avg) | ||
| from (select t2b, avg(t2b) avg | ||
| from t2 | ||
| where t2a = t1.t1b | ||
| SELECT t1a, t2b | ||
| FROM t1, t2 | ||
| WHERE t1b = t2c | ||
| AND t2b = (SELECT max(avg) | ||
| FROM (SELECT t2b, avg(t2b) avg | ||
| FROM t2 | ||
| WHERE t2a = t1.t1b | ||
| ) | ||
| ) | ||
| ; | ||
|
|
||
| -- TC 01.02 | ||
| -- Invalid due to the column t2b not part of the output from table t2. | ||
| select * | ||
| from t1 | ||
| where t1a in (select min(t2a) | ||
| from t2 | ||
| group by t2c | ||
| having t2c in (select max(t3c) | ||
| from t3 | ||
| group by t3b | ||
| having t3b > t2b )) | ||
| SELECT * | ||
| FROM t1 | ||
| WHERE t1a in (SELECT min(t2a) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: |
||
| FROM t2 | ||
| GROUP by t2c | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: |
||
| HAVING t2c IN (SELECT max(t3c) | ||
| FROM t3 | ||
| GROUP BY t3b | ||
| HAVING t3b > t2b )) | ||
| ; | ||
|
|
||
| -- TC 01.03 | ||
| -- Invalid due to mixure of outer and local references under an AggegatedExpression | ||
| -- in a correlated predicate | ||
| SELECT t1a | ||
| FROM t1 | ||
| GROUP BY 1 | ||
| HAVING EXISTS (SELECT 1 | ||
| FROM t2 | ||
| WHERE t2a < min(t1a + t2a)); | ||
|
|
||
| -- TC 01.04 | ||
| -- Invalid due to mixure of outer and local references under an AggegatedExpression | ||
| SELECT t1a | ||
| FROM t1 | ||
| WHERE t1a IN (SELECT t2a | ||
| FROM t2 | ||
| WHERE EXISTS (SELECT 1 | ||
| FROM t3 | ||
| GROUP BY 1 | ||
| HAVING min(t2a + t3a) > 1)); | ||
|
|
||
| -- TC 01.05 | ||
| -- Invalid due to outer reference appearing in projection list | ||
| SELECT t1a | ||
| FROM t1 | ||
| WHERE t1a IN (SELECT t2a | ||
| FROM t2 | ||
| WHERE EXISTS (SELECT min(t2a) | ||
| FROM t3)); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: checkMixedReferencesInsideAggregation -> checkMixedReferencesInsideAggregationExpr
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@viirya Will change it to checkMixedReferencesInsideAggregateExpr