Skip to content

Commit 789bdbe

Browse files
committed
[SPARK-20688][SQL] correctly check analysis for scalar sub-queries
## What changes were proposed in this pull request? In `CheckAnalysis`, we should call `checkAnalysis` for `ScalarSubquery` at the beginning, as later we will call `plan.output` which is invalid if `plan` is not resolved. ## How was this patch tested? new regression test Author: Wenchen Fan <[email protected]> Closes #17930 from cloud-fan/tmp.
1 parent b512233 commit 789bdbe

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,13 @@ trait CheckAnalysis extends PredicateHelper {
130130
}
131131

132132
case s @ ScalarSubquery(query, conditions, _) =>
133+
checkAnalysis(query)
134+
133135
// If no correlation, the output must be exactly one column
134136
if (conditions.isEmpty && query.output.size != 1) {
135137
failAnalysis(
136138
s"Scalar subquery must return only one column, but got ${query.output.size}")
137-
}
138-
else if (conditions.nonEmpty) {
139+
} else if (conditions.nonEmpty) {
139140
def checkAggregate(agg: Aggregate): Unit = {
140141
// Make sure correlated scalar subqueries contain one row for every outer row by
141142
// enforcing that they are aggregates containing exactly one aggregate expression.
@@ -179,7 +180,6 @@ trait CheckAnalysis extends PredicateHelper {
179180
case fail => failAnalysis(s"Correlated scalar subqueries must be Aggregated: $fail")
180181
}
181182
}
182-
checkAnalysis(query)
183183
s
184184

185185
case s: SubqueryExpression =>

sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
7272
}
7373
}
7474

75-
test("rdd deserialization does not crash [SPARK-15791]") {
75+
test("SPARK-15791: rdd deserialization does not crash") {
7676
sql("select (select 1 as b) as b").rdd.count()
7777
}
7878

@@ -867,4 +867,12 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
867867
sql("select * from l, r where l.a = r.c + 1 AND (exists (select * from r) OR l.a = r.c)"),
868868
Row(3, 3.0, 2, 3.0) :: Row(3, 3.0, 2, 3.0) :: Nil)
869869
}
870+
871+
test("SPARK-20688: correctly check analysis for scalar sub-queries") {
872+
withTempView("t") {
873+
Seq(1 -> "a").toDF("i", "j").createTempView("t")
874+
val e = intercept[AnalysisException](sql("SELECT (SELECT count(*) FROM t WHERE a = 1)"))
875+
assert(e.message.contains("cannot resolve '`a`' given input columns: [i, j]"))
876+
}
877+
}
870878
}

0 commit comments

Comments
 (0)