Skip to content

Commit 62c4361

Browse files
LuciferYangSandishKumarHN
authored andcommitted
[SPARK-40371][SQL] Migrate type check failures of NthValue and NTile onto error classes
### What changes were proposed in this pull request? This pr aims to replace TypeCheckFailure by DataTypeMismatch in type checks in window expressions, includes `NthValue` and `NTile` ### Why are the changes needed? Migration onto error classes unifies Spark SQL error messages. ### Does this PR introduce _any_ user-facing change? Yes. The PR changes user-facing error messages. ### How was this patch tested? Pass GitHub Actions Closes apache#38457 from LuciferYang/SPARK-40371. Authored-by: yangjie01 <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent db714bb commit 62c4361

File tree

3 files changed

+61
-20
lines changed

3 files changed

+61
-20
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ package org.apache.spark.sql.catalyst.expressions
2020
import java.util.Locale
2121

2222
import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedException}
23-
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckFailure, TypeCheckSuccess}
23+
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
2424
import org.apache.spark.sql.catalyst.dsl.expressions._
2525
import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLExpr, toSQLType}
2626
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, DeclarativeAggregate, NoOp}
2727
import org.apache.spark.sql.catalyst.trees.{BinaryLike, LeafLike, TernaryLike, UnaryLike}
2828
import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UNRESOLVED_WINDOW_EXPRESSION, WINDOW_EXPRESSION}
29-
import org.apache.spark.sql.errors.QueryExecutionErrors
29+
import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
3030
import org.apache.spark.sql.types._
3131

3232
/**
@@ -709,7 +709,7 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction with Le
709709
// scalastyle:on line.size.limit line.contains.tab
710710
case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
711711
extends AggregateWindowFunction with OffsetWindowFunction with ImplicitCastInputTypes
712-
with BinaryLike[Expression] {
712+
with BinaryLike[Expression] with QueryErrorsBase {
713713

714714
def this(child: Expression, offset: Expression) = this(child, offset, false)
715715

@@ -729,10 +729,23 @@ case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
729729
if (check.isFailure) {
730730
check
731731
} else if (!offset.foldable) {
732-
TypeCheckFailure(s"Offset expression '$offset' must be a literal.")
732+
DataTypeMismatch(
733+
errorSubClass = "NON_FOLDABLE_INPUT",
734+
messageParameters = Map(
735+
"inputName" -> "offset",
736+
"inputType" -> toSQLType(offset.dataType),
737+
"inputExpr" -> toSQLExpr(offset)
738+
)
739+
)
733740
} else if (offsetVal <= 0) {
734-
TypeCheckFailure(
735-
s"The 'offset' argument of nth_value must be greater than zero but it is $offsetVal.")
741+
DataTypeMismatch(
742+
errorSubClass = "VALUE_OUT_OF_RANGE",
743+
messageParameters = Map(
744+
"exprName" -> "offset",
745+
"valueRange" -> s"(0, ${Long.MaxValue}]",
746+
"currentValue" -> toSQLValue(offsetVal, LongType)
747+
)
748+
)
736749
} else {
737750
TypeCheckSuccess
738751
}
@@ -815,7 +828,7 @@ case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
815828
group = "window_funcs")
816829
// scalastyle:on line.size.limit line.contains.tab
817830
case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindowFunction
818-
with UnaryLike[Expression] {
831+
with UnaryLike[Expression] with QueryErrorsBase {
819832

820833
def this() = this(Literal(1))
821834

@@ -825,18 +838,39 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
825838
// for each partition.
826839
override def checkInputDataTypes(): TypeCheckResult = {
827840
if (!buckets.foldable) {
828-
return TypeCheckFailure(s"Buckets expression must be foldable, but got $buckets")
841+
DataTypeMismatch(
842+
errorSubClass = "NON_FOLDABLE_INPUT",
843+
messageParameters = Map(
844+
"inputName" -> "buckets",
845+
"inputType" -> toSQLType(buckets.dataType),
846+
"inputExpr" -> toSQLExpr(buckets)
847+
)
848+
)
829849
}
830850

831851
if (buckets.dataType != IntegerType) {
832-
return TypeCheckFailure(s"Buckets expression must be integer type, but got $buckets")
852+
DataTypeMismatch(
853+
errorSubClass = "UNEXPECTED_INPUT_TYPE",
854+
messageParameters = Map(
855+
"paramIndex" -> "1",
856+
"requiredType" -> toSQLType(IntegerType),
857+
"inputSql" -> toSQLExpr(buckets),
858+
"inputType" -> toSQLType(buckets.dataType))
859+
)
833860
}
834861

835862
val i = buckets.eval().asInstanceOf[Int]
836863
if (i > 0) {
837864
TypeCheckSuccess
838865
} else {
839-
TypeCheckFailure(s"Buckets expression must be positive, but got: $i")
866+
DataTypeMismatch(
867+
errorSubClass = "VALUE_OUT_OF_RANGE",
868+
messageParameters = Map(
869+
"exprName" -> "buckets",
870+
"valueRange" -> s"(0, ${Int.MaxValue}]",
871+
"currentValue" -> toSQLValue(i, IntegerType)
872+
)
873+
)
840874
}
841875
}
842876

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ class AnalysisErrorSuite extends AnalysisTest {
275275
SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as("window")),
276276
"Cannot specify window frame for lead function" :: Nil)
277277

278-
errorTest(
278+
errorClassTest(
279279
"the offset of nth_value window function is negative or zero",
280280
testRelation2.select(
281281
WindowExpression(
@@ -284,7 +284,12 @@ class AnalysisErrorSuite extends AnalysisTest {
284284
UnresolvedAttribute("a") :: Nil,
285285
SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
286286
SpecifiedWindowFrame(RowFrame, Literal(0), Literal(0)))).as("window")),
287-
"The 'offset' argument of nth_value must be greater than zero but it is 0." :: Nil)
287+
errorClass = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
288+
messageParameters = Map(
289+
"sqlExpr" -> "\"nth_value(b, 0)\"",
290+
"exprName" -> "offset",
291+
"valueRange" -> "(0, 9223372036854775807]",
292+
"currentValue" -> "0L"))
288293

289294
errorClassTest(
290295
"the offset of nth_value window function is not int literal",

sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -438,11 +438,12 @@ struct<>
438438
-- !query output
439439
org.apache.spark.sql.AnalysisException
440440
{
441-
"errorClass" : "_LEGACY_ERROR_TEMP_2315",
441+
"errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
442442
"messageParameters" : {
443-
"hint" : "",
444-
"msg" : "Buckets expression must be positive, but got: 0",
445-
"sqlExpr" : "ntile(0)"
443+
"currentValue" : "0",
444+
"exprName" : "buckets",
445+
"sqlExpr" : "\"ntile(0)\"",
446+
"valueRange" : "(0, 2147483647]"
446447
},
447448
"queryContext" : [ {
448449
"objectType" : "",
@@ -461,11 +462,12 @@ struct<>
461462
-- !query output
462463
org.apache.spark.sql.AnalysisException
463464
{
464-
"errorClass" : "_LEGACY_ERROR_TEMP_2315",
465+
"errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
465466
"messageParameters" : {
466-
"hint" : "",
467-
"msg" : "The 'offset' argument of nth_value must be greater than zero but it is 0.",
468-
"sqlExpr" : "nth_value(spark_catalog.default.tenk1.four, 0)"
467+
"currentValue" : "0L",
468+
"exprName" : "offset",
469+
"sqlExpr" : "\"nth_value(four, 0)\"",
470+
"valueRange" : "(0, 9223372036854775807]"
469471
},
470472
"queryContext" : [ {
471473
"objectType" : "",

0 commit comments

Comments
 (0)