[SPARK-40371][SQL] Migrate type check failures of NthValue and NTile onto error classes

LuciferYang · SandishKumarHN · commit 62c43617408b · 2022-12-12T14:58:17.000-08:00
### What changes were proposed in this pull request? This pr aims to replace TypeCheckFailure by DataTypeMismatch in type checks in window expressions, includes `NthValue` and `NTile` ### Why are the changes needed? Migration onto error classes unifies Spark SQL error messages. ### Does this PR introduce _any_ user-facing change? Yes. The PR changes user-facing error messages. ### How was this patch tested? Pass GitHub Actions Closes apache#38457 from LuciferYang/SPARK-40371. Authored-by: yangjie01 <yangjie01@baidu.com> Signed-off-by: Max Gekk <max.gekk@gmail.com>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -20,13 +20,13 @@ package org.apache.spark.sql.catalyst.expressions
 import java.util.Locale
 
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedException}
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLExpr, toSQLType}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, DeclarativeAggregate, NoOp}
 import org.apache.spark.sql.catalyst.trees.{BinaryLike, LeafLike, TernaryLike, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UNRESOLVED_WINDOW_EXPRESSION, WINDOW_EXPRESSION}
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.types._
 
 /**
@@ -709,7 +709,7 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction with Le
 // scalastyle:on line.size.limit line.contains.tab
 case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
     extends AggregateWindowFunction with OffsetWindowFunction with ImplicitCastInputTypes
-    with BinaryLike[Expression] {
+    with BinaryLike[Expression] with QueryErrorsBase {
 
   def this(child: Expression, offset: Expression) = this(child, offset, false)
 
@@ -729,10 +729,23 @@ case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
     if (check.isFailure) {
       check
     } else if (!offset.foldable) {
-      TypeCheckFailure(s"Offset expression '$offset' must be a literal.")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "offset",
+          "inputType" -> toSQLType(offset.dataType),
+          "inputExpr" -> toSQLExpr(offset)
+        )
+      )
     } else if (offsetVal <= 0) {
-      TypeCheckFailure(
-        s"The 'offset' argument of nth_value must be greater than zero but it is $offsetVal.")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "offset",
+          "valueRange" -> s"(0, ${Long.MaxValue}]",
+          "currentValue" -> toSQLValue(offsetVal, LongType)
+        )
+      )
     } else {
       TypeCheckSuccess
     }
@@ -815,7 +828,7 @@ case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
   group = "window_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindowFunction
-  with UnaryLike[Expression] {
+  with UnaryLike[Expression] with QueryErrorsBase {
 
   def this() = this(Literal(1))
 
@@ -825,18 +838,39 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
   // for each partition.
   override def checkInputDataTypes(): TypeCheckResult = {
     if (!buckets.foldable) {
-      return TypeCheckFailure(s"Buckets expression must be foldable, but got $buckets")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "buckets",
+          "inputType" -> toSQLType(buckets.dataType),
+          "inputExpr" -> toSQLExpr(buckets)
+        )
+      )
     }
 
     if (buckets.dataType != IntegerType) {
-      return TypeCheckFailure(s"Buckets expression must be integer type, but got $buckets")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType(IntegerType),
+          "inputSql" -> toSQLExpr(buckets),
+          "inputType" -> toSQLType(buckets.dataType))
+      )
     }
 
     val i = buckets.eval().asInstanceOf[Int]
     if (i > 0) {
       TypeCheckSuccess
     } else {
-      TypeCheckFailure(s"Buckets expression must be positive, but got: $i")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "buckets",
+          "valueRange" -> s"(0, ${Int.MaxValue}]",
+          "currentValue" -> toSQLValue(i, IntegerType)
+        )
+      )
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -275,7 +275,7 @@ class AnalysisErrorSuite extends AnalysisTest {
           SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as("window")),
     "Cannot specify window frame for lead function" :: Nil)
 
-  errorTest(
+  errorClassTest(
     "the offset of nth_value window function is negative or zero",
     testRelation2.select(
       WindowExpression(
@@ -284,7 +284,12 @@ class AnalysisErrorSuite extends AnalysisTest {
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           SpecifiedWindowFrame(RowFrame, Literal(0), Literal(0)))).as("window")),
-    "The 'offset' argument of nth_value must be greater than zero but it is 0." :: Nil)
+    errorClass = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+    messageParameters = Map(
+      "sqlExpr" -> "\"nth_value(b, 0)\"",
+      "exprName" -> "offset",
+      "valueRange" -> "(0, 9223372036854775807]",
+      "currentValue" -> "0L"))
 
   errorClassTest(
     "the offset of nth_value window function is not int literal",
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
@@ -438,11 +438,12 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2315",
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
   "messageParameters" : {
-    "hint" : "",
-    "msg" : "Buckets expression must be positive, but got: 0",
-    "sqlExpr" : "ntile(0)"
+    "currentValue" : "0",
+    "exprName" : "buckets",
+    "sqlExpr" : "\"ntile(0)\"",
+    "valueRange" : "(0, 2147483647]"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -461,11 +462,12 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2315",
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
   "messageParameters" : {
-    "hint" : "",
-    "msg" : "The 'offset' argument of nth_value must be greater than zero but it is 0.",
-    "sqlExpr" : "nth_value(spark_catalog.default.tenk1.four, 0)"
+    "currentValue" : "0L",
+    "exprName" : "offset",
+    "sqlExpr" : "\"nth_value(four, 0)\"",
+    "valueRange" : "(0, 9223372036854775807]"
   },
   "queryContext" : [ {
     "objectType" : "",