Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import scala.util.{Failure, Random, Success, Try}

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst._
import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.encoders.OuterScopes
import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _}
Expand Down Expand Up @@ -4247,7 +4248,30 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
* rule right after the main resolution batch.
*/
object RemoveTempResolvedColumn extends Rule[LogicalPlan] {
override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveExpressions {
case t: TempResolvedColumn => UnresolvedAttribute(t.nameParts)
override def apply(plan: LogicalPlan): LogicalPlan = {
plan.foreachUp {
// HAVING clause will be resolved as a Filter. When having func(column with wrong data type),
// the column could be wrapped by a TempResolvedColumn, e.g. mean(tempresolvedcolumn(t.c)).
// Because TempResolvedColumn can still preserve column data type, here is a chance to check
// if the data type matches with the required data type of the function. We can throw an error
// when data types mismatches.
case operator: Filter =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about handling TempResolvedColumn in CheckAnalysis? For example:

// Check argument data types of higher-order functions downwards first.
// If the arguments of the higher-order functions are resolved but the type check fails,
// the argument functions will not get resolved, but we should report the argument type
// check failure instead of claiming the argument functions are unresolved.
operator transformExpressionsDown {
case hof: HigherOrderFunction

Are there new issues if we keep TempResolvedColumn during the analysis?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #35404 (comment).

Unfortunately I don't have enough knowledge why removing TempResolvedColumn was introduced (it was introduced in #32470)

operator.expressions.foreach(_.foreachUp {
case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure =>
e.checkInputDataTypes() match {
case TypeCheckResult.TypeCheckFailure(message) =>
e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true)
e.failAnalysis(
s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
extraHintForAnsiTypeCoercionExpression(plan))
}
case _ =>
})
case _ =>
}

plan.resolveExpressions {
case t: TempResolvedColumn => UnresolvedAttribute(t.nameParts)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
}
}

private def extraHintForAnsiTypeCoercionExpression(plan: LogicalPlan): String = {
private[analysis] def extraHintForAnsiTypeCoercionExpression(plan: LogicalPlan): String = {
if (!SQLConf.get.ansiEnabled) {
""
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -645,4 +645,5 @@ case class TempResolvedColumn(child: Expression, nameParts: Seq[String]) extends
override def dataType: DataType = child.dataType
override protected def withNewChildInternal(newChild: Expression): Expression =
copy(child = newChild)
override def sql: String = child.sql
}
Original file line number Diff line number Diff line change
Expand Up @@ -1150,4 +1150,28 @@ class AnalysisSuite extends AnalysisTest with Matchers {
"MISSING_COLUMN",
Array("c.y", "x"))
}

test("SPARK-38118: Func(wrong_type) in the HAVING clause should throw data mismatch error") {
Seq("mean", "abs").foreach { func =>
assertAnalysisError(parsePlan(
s"""
|WITH t as (SELECT true c)
|SELECT t.c
|FROM t
|GROUP BY t.c
|HAVING ${func}(t.c) > 0d""".stripMargin),
Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"),
false)

assertAnalysisError(parsePlan(
s"""
|WITH t as (SELECT true c, false d)
|SELECT (t.c AND t.d) c
|FROM t
|GROUP BY t.c
|HAVING ${func}(c) > 0d""".stripMargin),
Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"),
false)
}
}
}