Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ identifierComment
;

relationPrimary
: tableIdentifier sample? (AS? strictIdentifier)? #tableName
: tableIdentifier sample? tableAlias #tableName
| '(' queryNoWith ')' sample? (AS? strictIdentifier) #aliasedQuery
| '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation
| inlineTable #inlineTableDefault2
Expand Down Expand Up @@ -711,7 +711,7 @@ nonReserved
| ADD
| OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | LAST | FIRST | AFTER
| MAP | ARRAY | STRUCT
| LATERAL | WINDOW | REDUCE | TRANSFORM | USING | SERDE | SERDEPROPERTIES | RECORDREADER
| LATERAL | WINDOW | REDUCE | TRANSFORM | SERDE | SERDEPROPERTIES | RECORDREADER
| DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED
| EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | GLOBAL | TEMPORARY | OPTIONS
| GROUPING | CUBE | ROLLUP
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,25 @@ class Analyzer(
def resolveRelation(plan: LogicalPlan): LogicalPlan = plan match {
case u: UnresolvedRelation if !isRunningDirectlyOnFiles(u.tableIdentifier) =>
val defaultDatabase = AnalysisContext.get.defaultDatabase
val relation = lookupTableFromCatalog(u, defaultDatabase)
val foundRelation = lookupTableFromCatalog(u, defaultDatabase)

// Add `Project` to rename output column names if a query has alias names:
// e.g., SELECT col1, col2 FROM testData AS t(col1, col2)
val relation = if (u.outputColumnNames.nonEmpty) {
val outputAttrs = foundRelation.output
// Checks if the number of the aliases equals to the number of columns in the table.
if (u.outputColumnNames.size != outputAttrs.size) {
u.failAnalysis(s"Number of column aliases does not match number of columns. " +
s"Table name: ${u.tableName}; number of column aliases: " +
s"${u.outputColumnNames.size}; number of columns: ${outputAttrs.size}.")
}
val aliases = outputAttrs.zip(u.outputColumnNames).map {
case (attr, name) => Alias(attr, name)()
}
Project(aliases, foundRelation)
} else {
foundRelation
}
resolveRelation(relation)
// The view's child should be a logical plan parsed from the `desc.viewText`, the variable
// `viewText` should be defined, or else we throw an error on the generation of the View
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,9 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] {
val outputAttrs = resolvedFunc.output
// Checks if the number of the aliases is equal to expected one
if (u.outputNames.size != outputAttrs.size) {
u.failAnalysis(s"expected ${outputAttrs.size} columns but " +
s"found ${u.outputNames.size} columns")
u.failAnalysis(s"Number of given aliases does not match number of output columns. " +
s"Function name: ${u.functionName}; number of aliases: " +
s"${u.outputNames.size}; number of output columns: ${outputAttrs.size}.")
}
val aliases = outputAttrs.zip(u.outputNames).map {
case (attr, name) => Alias(attr, name)()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,21 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str

/**
* Holds the name of a relation that has yet to be looked up in a catalog.
* We could add alias names for columns in a relation:
* {{{
* // Assign alias names
* SELECT col1, col2 FROM testData AS t(col1, col2);
* }}}
*
* @param tableIdentifier table name
* @param outputColumnNames alias names of columns. If these names given, an analyzer adds
* [[Project]] to rename the columns.
*/
case class UnresolvedRelation(tableIdentifier: TableIdentifier) extends LeafNode {
case class UnresolvedRelation(
tableIdentifier: TableIdentifier,
outputColumnNames: Seq[String] = Seq.empty)
extends LeafNode {

/** Returns a `.` separated name for this relation. */
def tableName: String = tableIdentifier.unquotedString

Expand Down Expand Up @@ -71,6 +84,11 @@ case class UnresolvedInlineTable(
* // Assign alias names
* select t.a from range(10) t(a);
* }}}
*
* @param functionName name of this table-value function
* @param functionArgs list of function arguments
* @param outputNames alias names of function output columns. If these names given, an analyzer
* adds [[Project]] to rename the output columns.
*/
case class UnresolvedTableValuedFunction(
functionName: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -676,12 +676,16 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
* Create an aliased table reference. This is typically used in FROM clauses.
*/
override def visitTableName(ctx: TableNameContext): LogicalPlan = withOrigin(ctx) {
val table = UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier))

val tableWithAlias = Option(ctx.strictIdentifier).map(_.getText) match {
case Some(strictIdentifier) =>
SubqueryAlias(strictIdentifier, table)
case _ => table
val tableId = visitTableIdentifier(ctx.tableIdentifier)
val table = if (ctx.tableAlias.identifierList != null) {
UnresolvedRelation(tableId, visitIdentifierList(ctx.tableAlias.identifierList))
} else {
UnresolvedRelation(tableId)
}
val tableWithAlias = if (ctx.tableAlias.strictIdentifier != null) {
SubqueryAlias(ctx.tableAlias.strictIdentifier.getText, table)
} else {
table
}
tableWithAlias.optionalMap(ctx.sample)(withSample)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ case class ResolvedHint(child: LogicalPlan, hints: HintInfo = HintInfo())

override def output: Seq[Attribute] = child.output

override lazy val canonicalized: LogicalPlan = child.canonicalized

override def computeStats(conf: SQLConf): Statistics = {
val stats = child.stats(conf)
stats.copy(hints = hints)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,23 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers {
assertAnalysisSuccess(rangeWithAliases(2 :: 6 :: 2 :: Nil, "c" :: Nil))
assertAnalysisError(
rangeWithAliases(3 :: Nil, "a" :: "b" :: Nil),
Seq("expected 1 columns but found 2 columns"))
Seq("Number of given aliases does not match number of output columns. "
+ "Function name: range; number of aliases: 2; number of output columns: 1."))
}

test("SPARK-20841 Support table column aliases in FROM clause") {
def tableColumnsWithAliases(outputNames: Seq[String]): LogicalPlan = {
SubqueryAlias("t", UnresolvedRelation(TableIdentifier("TaBlE3"), outputNames))
.select(star())
}
assertAnalysisSuccess(tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil))
assertAnalysisError(
tableColumnsWithAliases("col1" :: Nil),
Seq("Number of column aliases does not match number of columns. Table name: TaBlE3; " +
"number of column aliases: 1; number of columns: 4."))
assertAnalysisError(
tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil),
Seq("Number of column aliases does not match number of columns. Table name: TaBlE3; " +
"number of column aliases: 5; number of columns: 4."))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ trait AnalysisTest extends PlanTest {
val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true)
catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true)
catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true)
new Analyzer(catalog, conf) {
override val extendedResolutionRules = EliminateSubqueryAliases :: Nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

package org.apache.spark.sql.catalyst.parser

import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.{UnresolvedGenerator, UnresolvedInlineTable, UnresolvedTableValuedFunction}
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.{UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedTableValuedFunction}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
Expand Down Expand Up @@ -493,6 +493,13 @@ class PlanParserSuite extends PlanTest {
.select(star()))
}

test("SPARK-20841 Support table column aliases in FROM clause") {
assertEqual(
"SELECT * FROM testData AS t(col1, col2)",
SubqueryAlias("t", UnresolvedRelation(TableIdentifier("testData"), Seq("col1", "col2")))
.select(star()))
}

test("inline table") {
assertEqual("values 1, 2, 3, 4",
UnresolvedInlineTable(Seq("col1"), Seq(1, 2, 3, 4).map(x => Seq(Literal(x)))))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class TableIdentifierParserSuite extends SparkFunSuite {
"insert", "int", "into", "is", "lateral", "like", "local", "none", "null",
"of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke",
"rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
"true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike",
"true", "truncate", "update", "user", "values", "with", "regexp", "rlike",
"bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
"int", "smallint", "timestamp", "at")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Union}
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, ResolvedHint, Union}
import org.apache.spark.sql.catalyst.util._

/**
Expand Down Expand Up @@ -66,4 +66,10 @@ class SameResultSuite extends SparkFunSuite {
assertSameResult(Union(Seq(testRelation, testRelation2)),
Union(Seq(testRelation2, testRelation)))
}

test("hint") {
val df1 = testRelation.join(ResolvedHint(testRelation))
val df2 = testRelation.join(testRelation)
assertSameResult(df1, df2)
}
}
17 changes: 17 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
-- Test data.
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS testData(a, b);

-- Table column aliases in FROM clause
SELECT * FROM testData AS t(col1, col2) WHERE col1 = 1;

SELECT * FROM testData AS t(col1, col2) WHERE col1 = 2;

SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k;

-- Aliasing the wrong number of columns in the FROM clause
SELECT * FROM testData AS t(col1, col2, col3);

SELECT * FROM testData AS t(col1);

-- Check alias duplication
SELECT a AS col1, b AS col2 FROM testData AS t(c, d);
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 7


-- !query 0
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS testData(a, b)
-- !query 0 schema
struct<>
-- !query 0 output



-- !query 1
SELECT * FROM testData AS t(col1, col2) WHERE col1 = 1
-- !query 1 schema
struct<col1:int,col2:int>
-- !query 1 output
1 1
1 2


-- !query 2
SELECT * FROM testData AS t(col1, col2) WHERE col1 = 2
-- !query 2 schema
struct<col1:int,col2:int>
-- !query 2 output
2 1


-- !query 3
SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k
-- !query 3 schema
struct<k:int,sum(col2):bigint>
-- !query 3 output
1 3
2 1


-- !query 4
SELECT * FROM testData AS t(col1, col2, col3)
-- !query 4 schema
struct<>
-- !query 4 output
org.apache.spark.sql.AnalysisException
Number of column aliases does not match number of columns. Table name: testData; number of column aliases: 3; number of columns: 2.; line 1 pos 14


-- !query 5
SELECT * FROM testData AS t(col1)
-- !query 5 schema
struct<>
-- !query 5 output
org.apache.spark.sql.AnalysisException
Number of column aliases does not match number of columns. Table name: testData; number of column aliases: 1; number of columns: 2.; line 1 pos 14


-- !query 6
SELECT a AS col1, b AS col2 FROM testData AS t(c, d)
-- !query 6 schema
struct<>
-- !query 6 output
org.apache.spark.sql.AnalysisException
cannot resolve '`a`' given input columns: [c, d]; line 1 pos 7
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ object TPCDSQueryBenchmark {
// per-row processing time for those cases.
val queryRelations = scala.collection.mutable.HashSet[String]()
spark.sql(queryString).queryExecution.logical.map {
case ur @ UnresolvedRelation(t: TableIdentifier) =>
case UnresolvedRelation(t: TableIdentifier, _) =>
queryRelations.add(t.table)
case lp: LogicalPlan =>
lp.expressions.foreach { _ foreach {
case subquery: SubqueryExpression =>
subquery.plan.foreach {
case ur @ UnresolvedRelation(t: TableIdentifier) =>
case UnresolvedRelation(t: TableIdentifier, _) =>
queryRelations.add(t.table)
case _ =>
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -681,9 +681,11 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
}
}

// construct Spark's statistics from information in Hive metastore
// Restore Spark's statistics from information in Metastore.
val statsProps = table.properties.filterKeys(_.startsWith(STATISTICS_PREFIX))

// Currently we have two sources of statistics: one from Hive and the other from Spark.
// In our design, if Spark's statistics is available, we respect it over Hive's statistics.
if (statsProps.nonEmpty) {
val colStats = new mutable.HashMap[String, ColumnStat]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,8 @@ private[hive] class HiveClientImpl(
}
val comment = properties.get("comment")

// Here we are reading statistics from Hive.
// Note that this statistics could be overridden by Spark's statistics if that's available.
val totalSize = properties.get(StatsSetupConst.TOTAL_SIZE).map(BigInt(_))
val rawDataSize = properties.get(StatsSetupConst.RAW_DATA_SIZE).map(BigInt(_))
val rowCount = properties.get(StatsSetupConst.ROW_COUNT).map(BigInt(_)).filter(_ >= 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ private[hive] class TestHiveQueryExecution(
// Make sure any test tables referenced are loaded.
val referencedTables =
describedTables ++
logical.collect { case UnresolvedRelation(tableIdent) => tableIdent.table }
logical.collect { case UnresolvedRelation(tableIdent, _) => tableIdent.table }
val referencedTestTables = referencedTables.filter(sparkSession.testTables.contains)
logDebug(s"Query references test tables: ${referencedTestTables.mkString(", ")}")
referencedTestTables.foreach(sparkSession.loadTestTable)
Expand Down