Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import scala.util.{Failure, Random, Success, Try}

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst._
import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR}
import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.encoders.OuterScopes
import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _}
Expand Down Expand Up @@ -4361,10 +4361,7 @@ object RemoveTempResolvedColumn extends Rule[LogicalPlan] {
case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure =>
e.checkInputDataTypes() match {
case TypeCheckResult.TypeCheckFailure(message) =>
e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true)
e.failAnalysis(
s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
extraHintForAnsiTypeCoercionExpression(plan))
e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message)
}
case _ =>
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {

val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError")

val DATA_TYPE_MISMATCH_ERROR_MESSAGE = TreeNodeTag[String]("dataTypeMismatchError")

protected def failAnalysis(msg: String): Nothing = {
throw new AnalysisException(msg)
}
Expand Down Expand Up @@ -174,7 +176,20 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
}
}

getAllExpressions(operator).foreach(_.foreachUp {
val expressions = getAllExpressions(operator)

expressions.foreach(_.foreachUp {
case e: Expression =>
e.getTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE) match {
case Some(message) =>
e.failAnalysis(s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
extraHintForAnsiTypeCoercionExpression(operator))
case _ =>
}
case _ =>
})

expressions.foreach(_.foreachUp {
case a: Attribute if !a.resolved =>
val missingCol = a.sql
val candidates = operator.inputSet.toSeq.map(_.qualifiedName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1170,13 +1170,25 @@ class AnalysisSuite extends AnalysisTest with Matchers {
|WITH t as (SELECT true c, false d)
|SELECT (t.c AND t.d) c
|FROM t
|GROUP BY t.c
|GROUP BY t.c, t.d
Copy link
Contributor Author

@LuciferYang LuciferYang Jun 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

without t.d, the error message is

expression 't.d' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.

|HAVING ${func}(c) > 0d""".stripMargin),
Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"),
Seq(s"cannot resolve '$func(c)' due to data type mismatch"),
false)
}
}

test("SPARK-39354: should be `Table or view not found`") {
assertAnalysisError(parsePlan(
s"""
|WITH t1 as (SELECT 1 user_id, CAST("2022-06-02" AS DATE) dt)
|SELECT *
|FROM t1
|JOIN t2 ON t1.user_id = t2.user_id
|WHERE t1.dt >= DATE_SUB('2020-12-27', 90)""".stripMargin),
Seq(s"Table or view not found: t2"),
false)
}

test("SPARK-39144: nested subquery expressions deduplicate relations should be done bottom up") {
val innerRelation = SubqueryAlias("src1", testRelation)
val outerRelation = SubqueryAlias("src2", testRelation)
Expand Down