diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index e02b9e9e460f..cd5e18873f7e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -114,12 +114,22 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { } def checkAnalysis(plan: LogicalPlan): Unit = { - // We transform up and order the rules so as to catch the first possible failure instead - // of the result of cascading resolution failures. Inline all CTEs in the plan to help check - // query plan structures in subqueries. val inlineCTE = InlineCTE(alwaysInline = true) - inlineCTE(plan).foreachUp { + val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int)] + inlineCTE.buildCTEMap(plan, cteMap) + cteMap.values.foreach { case (relation, refCount) => + // If a CTE relation is never used, it will disappear after inline. Here we explicitly check + // analysis for it, to make sure the entire query plan is valid. + if (refCount == 0) checkAnalysis0(relation.child) + } + // Inline all CTEs in the plan to help check query plan structures in subqueries. + checkAnalysis0(inlineCTE(plan)) + } + def checkAnalysis0(plan: LogicalPlan): Unit = { + // We transform up and order the rules so as to catch the first possible failure instead + // of the result of cascading resolution failures. + plan.foreachUp { case p if p.analyzed => // Skip already analyzed sub-plans case leaf: LeafNode if leaf.output.map(_.dataType).exists(CharVarcharUtils.hasCharVarchar) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala index a740b92933fa..3380b52832bd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala @@ -68,7 +68,7 @@ case class InlineCTE(alwaysInline: Boolean = false) extends Rule[LogicalPlan] { cteDef.child.exists(_.expressions.exists(_.isInstanceOf[OuterReference])) } - private def buildCTEMap( + def buildCTEMap( plan: LogicalPlan, cteMap: mutable.HashMap[Long, (CTERelationDef, Int)]): Unit = { plan match { diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql index ec2f4808fcfc..a3276330667d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql @@ -49,6 +49,10 @@ WITH t(x) AS (SELECT 2) SELECT * FROM t; +-- invalid CTE relation should fail the query even if it's not referenced +WITH t AS (SELECT 1 FROM non_existing_table) +SELECT 2; + -- Clean up DROP VIEW IF EXISTS t; DROP VIEW IF EXISTS t2; diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index 503ddd214d95..eb57b49119c9 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -163,6 +163,16 @@ org.apache.spark.sql.catalyst.parser.ParseException } +-- !query +WITH t AS (SELECT 1 FROM non_existing_table) +SELECT 2 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Table or view not found: non_existing_table; line 1 pos 25 + + -- !query DROP VIEW IF EXISTS t -- !query schema