Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ ctes
;

namedQuery
: name=identifier AS? '(' query ')'
: name=identifier (columnAliases=identifierList)? AS? '(' query ')'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate names within a single CTE definition are not allowed.

;

tableProvider
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
* This is only used for Common Table Expressions.
*/
override def visitNamedQuery(ctx: NamedQueryContext): SubqueryAlias = withOrigin(ctx) {
SubqueryAlias(ctx.name.getText, plan(ctx.query))
val subQuery: LogicalPlan = plan(ctx.query).optionalMap(ctx.columnAliases)(
(columnAliases, plan) =>
UnresolvedSubqueryColumnAliases(visitIdentifierList(columnAliases), plan)
)
SubqueryAlias(ctx.name.getText, subQuery)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
import org.apache.spark.sql.catalyst.plans.{Cross, Inner}
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning,
Expand Down Expand Up @@ -633,4 +634,15 @@ class AnalysisSuite extends AnalysisTest with Matchers {
val res = ViewAnalyzer.execute(view)
comparePlans(res, expected)
}

test("CTE with non-existing column alias") {
assertAnalysisError(parsePlan("WITH t(x) AS (SELECT 1) SELECT * FROM t WHERE y = 1"),
Seq("cannot resolve '`y`' given input columns: [x]"))
}

test("CTE with non-matching column alias") {
assertAnalysisError(parsePlan("WITH t(x, y) AS (SELECT 1) SELECT * FROM t WHERE x = 1"),
Seq("Number of column aliases does not match number of columns. Number of column aliases: " +
"2; number of columns: 1."))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,31 @@ class PlanParserSuite extends AnalysisTest {
assertEqual("select * from a intersect all select * from b", a.intersect(b, isAll = true))
}

private def cte(plan: LogicalPlan, namedPlans: (String, (LogicalPlan, Seq[String]))*): With = {
val ctes = namedPlans.map {
case (name, (cte, columnAliases)) =>
val subquery = if (columnAliases.isEmpty) {
cte
} else {
UnresolvedSubqueryColumnAliases(columnAliases, cte)
}
name -> SubqueryAlias(name, subquery)
}
With(plan, ctes)
}

test("common table expressions") {
assertEqual(
"with cte1 as (select * from a) select * from cte1",
cte(table("cte1").select(star()), "cte1" -> table("a").select(star())))
cte(table("cte1").select(star()), "cte1" -> ((table("a").select(star()), Seq.empty))))
assertEqual(
"with cte1 (select 1) select * from cte1",
cte(table("cte1").select(star()), "cte1" -> OneRowRelation().select(1)))
cte(table("cte1").select(star()), "cte1" -> ((OneRowRelation().select(1), Seq.empty))))
assertEqual(
"with cte1 (select 1), cte2 as (select * from cte1) select * from cte2",
cte(table("cte2").select(star()),
"cte1" -> OneRowRelation().select(1),
"cte2" -> table("cte1").select(star())))
"cte1" -> ((OneRowRelation().select(1), Seq.empty)),
"cte2" -> ((table("cte1").select(star()), Seq.empty))))
intercept(
"with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1",
"Found duplicate keys 'cte1'")
Expand Down Expand Up @@ -818,4 +831,10 @@ class PlanParserSuite extends AnalysisTest {
"SELECT /*+ BROADCAST(tab) */ * FROM testcat.db.tab",
table("testcat", "db", "tab").select(star()).hint("BROADCAST", $"tab"))
}

test("CTE with column alias") {
assertEqual(
"WITH t(x) AS (SELECT c FROM a) SELECT * FROM t",
cte(table("t").select(star()), "t" -> ((table("a").select('c), Seq("x")))))
}
}
4 changes: 4 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/cte.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ SELECT t1.id AS c1,
FROM CTE1 t1
CROSS JOIN CTE1 t2;

-- CTE with column alias
WITH t(x) AS (SELECT 1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add more test cases? For example, can a with clause in a subquery shadow a with clause in an enclosing query with the same name? Another example, use with clauses in a subquery expression?

Copy link
Contributor Author

@peter-toth peter-toth Jun 13, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to focus on column aliases in this PR, but I have another open here which focuses on nested WITH clauses: #24831

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep. For the nested WITH, #24831 would be a better place to add them.
After merging this, let's proceed as @gatorsmile suggested, @peter-toth .

SELECT * FROM t WHERE x = 1;

-- Clean up
DROP VIEW IF EXISTS t;
DROP VIEW IF EXISTS t2;
19 changes: 14 additions & 5 deletions sql/core/src/test/resources/sql-tests/results/cte.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 9
-- Number of queries: 10


-- !query 0
Expand Down Expand Up @@ -89,16 +89,25 @@ struct<c1:int,c2:int>


-- !query 7
DROP VIEW IF EXISTS t
WITH t(x) AS (SELECT 1)
SELECT * FROM t WHERE x = 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Almost all the test cases are using one column in the CTE definition. Can you try your best to improve the current test coverage in this new syntax?

-- !query 7 schema
struct<>
struct<x:int>
-- !query 7 output

1


-- !query 8
DROP VIEW IF EXISTS t2
DROP VIEW IF EXISTS t
-- !query 8 schema
struct<>
-- !query 8 output



-- !query 9
DROP VIEW IF EXISTS t2
-- !query 9 schema
struct<>
-- !query 9 output