diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index b13dede2acc4..3017fc10dfd4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -28,7 +28,7 @@ import scala.util.{Failure, Random, Success, Try} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst._ -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR} +import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.encoders.OuterScopes import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _} @@ -4361,10 +4361,7 @@ object RemoveTempResolvedColumn extends Rule[LogicalPlan] { case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure => e.checkInputDataTypes() match { case TypeCheckResult.TypeCheckFailure(message) => - e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true) - e.failAnalysis( - s"cannot resolve '${e.sql}' due to data type mismatch: $message" + - extraHintForAnsiTypeCoercionExpression(plan)) + e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message) } case _ => }) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 95b0226f00de..ed2e9ba2b6bf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -50,6 +50,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError") + val DATA_TYPE_MISMATCH_ERROR_MESSAGE = TreeNodeTag[String]("dataTypeMismatchError") + protected def failAnalysis(msg: String): Nothing = { throw new AnalysisException(msg) } @@ -174,7 +176,20 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { } } - getAllExpressions(operator).foreach(_.foreachUp { + val expressions = getAllExpressions(operator) + + expressions.foreach(_.foreachUp { + case e: Expression => + e.getTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE) match { + case Some(message) => + e.failAnalysis(s"cannot resolve '${e.sql}' due to data type mismatch: $message" + + extraHintForAnsiTypeCoercionExpression(operator)) + case _ => + } + case _ => + }) + + expressions.foreach(_.foreachUp { case a: Attribute if !a.resolved => val missingCol = a.sql val candidates = operator.inputSet.toSeq.map(_.qualifiedName) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 84f9c6c5e761..a6e952fd8657 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -1170,13 +1170,25 @@ class AnalysisSuite extends AnalysisTest with Matchers { |WITH t as (SELECT true c, false d) |SELECT (t.c AND t.d) c |FROM t - |GROUP BY t.c + |GROUP BY t.c, t.d |HAVING ${func}(c) > 0d""".stripMargin), - Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"), + Seq(s"cannot resolve '$func(c)' due to data type mismatch"), false) } } + test("SPARK-39354: should be `Table or view not found`") { + assertAnalysisError(parsePlan( + s""" + |WITH t1 as (SELECT 1 user_id, CAST("2022-06-02" AS DATE) dt) + |SELECT * + |FROM t1 + |JOIN t2 ON t1.user_id = t2.user_id + |WHERE t1.dt >= DATE_SUB('2020-12-27', 90)""".stripMargin), + Seq(s"Table or view not found: t2"), + false) + } + test("SPARK-39144: nested subquery expressions deduplicate relations should be done bottom up") { val innerRelation = SubqueryAlias("src1", testRelation) val outerRelation = SubqueryAlias("src2", testRelation)