Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1052,7 +1052,7 @@ class Analyzer(
case ae: AnalysisException => s
}

case f @ Filter(cond, child) if child.resolved =>
case f @ Filter(cond, child) if !f.resolved && child.resolved =>

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only added for Filter? How about Sort in the same rule?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure.

try {
val newCond = resolveExpressionRecursively(cond, child)
val requiredAttrs = newCond.references.filter(_.resolved)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ object ResolveHints {
plan match {
case u: UnresolvedRelation if toBroadcast.exists(resolver(_, u.tableIdentifier.table)) =>
BroadcastHint(plan)
case r: SubqueryAlias if toBroadcast.exists(resolver(_, r.alias)) =>
case SubqueryAlias(Some(alias), _) if toBroadcast.exists(resolver(_, alias)) =>
BroadcastHint(plan)

case _: BroadcastHint | _: View | _: With | _: SubqueryAlias =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
query.optional(ctx.ctes) {
val ctes = ctx.ctes.namedQuery.asScala.map { nCtx =>
val namedQuery = visitNamedQuery(nCtx)
(namedQuery.alias, namedQuery)
(namedQuery.alias.get, namedQuery)
}
// Check for duplicate names.
checkDuplicateKeys(ctes, ctx)
Expand Down Expand Up @@ -732,9 +732,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
* hooks.
*/
override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) {
plan(ctx.queryNoWith)
val query = plan(ctx.queryNoWith)
.optionalMap(ctx.sample)(withSample)
.optionalMap(ctx.strictIdentifier)(aliasPlan)
if (ctx.strictIdentifier != null) {
aliasPlan(ctx.strictIdentifier, query)
} else {
SubqueryAlias(query)
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -798,14 +798,26 @@ case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNo
}
}

object SubqueryAlias {
def apply(alias: String, child: LogicalPlan): SubqueryAlias = SubqueryAlias(Some(alias), child)
def apply(child: LogicalPlan): SubqueryAlias = SubqueryAlias(None, child)
}

/**
* Aliased subquery.
*
* @param alias the alias name for this subquery. If `None` is given, the `output` will have
* empty qualifier.
* @param child the LogicalPlan

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: the LogicalPlan -> the logical plan of this subquery

*/
case class SubqueryAlias(
alias: String,
alias: Option[String],
child: LogicalPlan)
extends UnaryNode {

override lazy val canonicalized: LogicalPlan = child.canonicalized

override def output: Seq[Attribute] = child.output.map(_.withQualifier(Some(alias)))
override def output: Seq[Attribute] = child.output.map(_.withQualifier(alias))
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
val ds2 =
sql(
"""
|SELECT * FROM (SELECT max(c1) FROM t1 GROUP BY c1)
|SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1)
|WHERE
|c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
|OR
Expand Down Expand Up @@ -788,7 +788,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
// Scalar subquery and predicate subquery
sql(
"""
|SELECT * FROM (SELECT max(c1) FROM t1 GROUP BY c1)
|SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1)
|WHERE
|c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
|OR
Expand All @@ -800,7 +800,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
val cachedDs2 =
sql(
"""
|SELECT * FROM (SELECT max(c1) FROM t1 GROUP BY c1)
|SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1)
|WHERE
|c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
|OR
Expand Down
25 changes: 24 additions & 1 deletion sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
"""
| select c1 from onerow t1
| where exists (select 1
| from (select 1 from onerow t2 LIMIT 1)
| from (select 1 as c1 from onerow t2 LIMIT 1) t2
| where t1.c1=t2.c1)""".stripMargin),
Row(1) :: Nil)
}
Expand Down Expand Up @@ -868,6 +868,29 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
Row(3, 3.0, 2, 3.0) :: Row(3, 3.0, 2, 3.0) :: Nil)
}

test("SPARK-20690: Do not add missing attributes through subqueries") {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we still need this test? I think it's all covered by the parser test

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. We can remove this.

withTempView("onerow") {
Seq(1).toDF("c1").createOrReplaceTempView("onerow")

val e = intercept[AnalysisException] {
sql(
"""
| select 1
| from (select 1 from onerow t1 LIMIT 1)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised we support this syntax, I think the FROM clause must have an alias.

I checked with postgres, it will throw exception subquery in FROM must have an alias, can you check with other databases? Thanks!

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mysql:

mysql> select 1 from (select 1 from test);
ERROR 1248 (42000): Every derived table must have its own alias

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://cwiki.apache.org/confluence/display/Hive/LanguageManual+SubQueries

Hive supports subqueries only in the FROM clause (through Hive 0.12). The subquery has to be given a name because every table in a FROM clause must have a name.

Hive also requires an alias name.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://docs.oracle.com/cd/E17952_01/mysql-5.1-en/from-clause-subqueries.html

The [AS] name clause is mandatory, because every table in a FROM clause must have a name. Any columns in the subquery select list must have unique names.

Oracle also requires it.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @hvanhovell shall we change the parser? I think it's hard to reason about the semantic of an anonymous subquery

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, in this change I remove qualifier after an anonymous subquery. Not sure if it is what we always want.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should change the parser and require alias for subquery.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this seems confusing. Subqueries should be have an alias. Let's try to add that.

| where t1.c1=1""".stripMargin)
}
assert(e.message.contains("cannot resolve '`t1.c1`'"))

checkAnswer(
sql(
"""
| select 1
| from (select 1 as c1 from onerow t1 LIMIT 1) t2
| where t2.c1=1""".stripMargin),
Row(1) :: Nil)
}
}

test("SPARK-20688: correctly check analysis for scalar sub-queries") {
withTempView("t") {
Seq(1 -> "a").toDF("i", "j").createTempView("t")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class HiveMetastoreCatalogSuite extends TestHiveSingleton with SQLTestUtils {
spark.sql("create view vw1 as select 1 as id")
val plan = spark.sql("select id from vw1").queryExecution.analyzed
val aliases = plan.collect {
case x @ SubqueryAlias("vw1", _) => x
case x @ SubqueryAlias(Some("vw1"), _) => x
}
assert(aliases.size == 1)
}
Expand Down