diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 18684bdad63c..195c25177da8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -28,6 +28,7 @@ import scala.util.{Failure, Random, Success, Try} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst._ +import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.encoders.OuterScopes import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _} @@ -4247,7 +4248,30 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] { * rule right after the main resolution batch. */ object RemoveTempResolvedColumn extends Rule[LogicalPlan] { - override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveExpressions { - case t: TempResolvedColumn => UnresolvedAttribute(t.nameParts) + override def apply(plan: LogicalPlan): LogicalPlan = { + plan.foreachUp { + // HAVING clause will be resolved as a Filter. When having func(column with wrong data type), + // the column could be wrapped by a TempResolvedColumn, e.g. mean(tempresolvedcolumn(t.c)). + // Because TempResolvedColumn can still preserve column data type, here is a chance to check + // if the data type matches with the required data type of the function. We can throw an error + // when data types mismatches. + case operator: Filter => + operator.expressions.foreach(_.foreachUp { + case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure => + e.checkInputDataTypes() match { + case TypeCheckResult.TypeCheckFailure(message) => + e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true) + e.failAnalysis( + s"cannot resolve '${e.sql}' due to data type mismatch: $message" + + extraHintForAnsiTypeCoercionExpression(plan)) + } + case _ => + }) + case _ => + } + + plan.resolveExpressions { + case t: TempResolvedColumn => UnresolvedAttribute(t.nameParts) + } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index d06996a09df0..2da2686bdc84 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -623,7 +623,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { } } - private def extraHintForAnsiTypeCoercionExpression(plan: LogicalPlan): String = { + private[analysis] def extraHintForAnsiTypeCoercionExpression(plan: LogicalPlan): String = { if (!SQLConf.get.ansiEnabled) { "" } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index b861e5df72c3..c8ef71eb8b89 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -645,4 +645,5 @@ case class TempResolvedColumn(child: Expression, nameParts: Seq[String]) extends override def dataType: DataType = child.dataType override protected def withNewChildInternal(newChild: Expression): Expression = copy(child = newChild) + override def sql: String = child.sql } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 63f90a8d6b88..ff05b797e7cd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -1150,4 +1150,28 @@ class AnalysisSuite extends AnalysisTest with Matchers { "MISSING_COLUMN", Array("c.y", "x")) } + + test("SPARK-38118: Func(wrong_type) in the HAVING clause should throw data mismatch error") { + Seq("mean", "abs").foreach { func => + assertAnalysisError(parsePlan( + s""" + |WITH t as (SELECT true c) + |SELECT t.c + |FROM t + |GROUP BY t.c + |HAVING ${func}(t.c) > 0d""".stripMargin), + Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"), + false) + + assertAnalysisError(parsePlan( + s""" + |WITH t as (SELECT true c, false d) + |SELECT (t.c AND t.d) c + |FROM t + |GROUP BY t.c + |HAVING ${func}(c) > 0d""".stripMargin), + Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"), + false) + } + } }