Skip to content

Commit 93a09ea

Browse files
committed
[SPARK-47072][SQL][3.5] Fix supported interval formats in error messages
### What changes were proposed in this pull request? In the PR, I propose to add one more field to keys of `supportedFormat` in `IntervalUtils` because current implementation has duplicate keys that overwrites each other. For instance, the following keys are the same: ``` (YM.YEAR, YM.MONTH) ... (DT.DAY, DT.HOUR) ``` because `YM.YEAR = DT.DAY = 0` and `YM.MONTH = DT.HOUR = 1`. This is a backport of #45127. ### Why are the changes needed? To fix the incorrect error message when Spark cannot parse ANSI interval string. For example, the expected format should be some year-month format but Spark outputs day-time one: ```sql spark-sql (default)> select interval '-\t2-2\t' year to month; Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: - 2-2 . (line 1, pos 16) == SQL == select interval '-\t2-2\t' year to month ----------------^^^ ``` ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? By running the existing test suite: ``` $ build/sbt "test:testOnly *IntervalUtilsSuite" ``` and regenerating the golden files: ``` $ SPARK_GENERATE_GOLDEN_FILES=1 PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Authored-by: Max Gekk <max.gekkgmail.com> (cherry picked from commit 074fcf2) Closes #45139 from MaxGekk/fix-supportedFormat-3.5. Authored-by: Max Gekk <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent 5067447 commit 93a09ea

File tree

6 files changed

+27
-22
lines changed

6 files changed

+27
-22
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -107,25 +107,30 @@ object IntervalUtils extends SparkIntervalUtils {
107107
fallBackNotice: Option[String] = None) = {
108108
throw new IllegalArgumentException(
109109
s"Interval string does not match $intervalStr format of " +
110-
s"${supportedFormat((startFiled, endField)).map(format => s"`$format`").mkString(", ")} " +
110+
s"${supportedFormat((intervalStr, startFiled, endField))
111+
.map(format => s"`$format`").mkString(", ")} " +
111112
s"when cast to $typeName: ${input.toString}" +
112113
s"${fallBackNotice.map(s => s", $s").getOrElse("")}")
113114
}
114115

115116
val supportedFormat = Map(
116-
(YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"),
117-
(YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"),
118-
(YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"),
119-
(DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"),
120-
(DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"),
121-
(DT.DAY, DT.MINUTE) -> Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"),
122-
(DT.DAY, DT.SECOND) -> Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"),
123-
(DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"),
124-
(DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"),
125-
(DT.HOUR, DT.SECOND) -> Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"),
126-
(DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"),
127-
(DT.MINUTE, DT.SECOND) -> Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"),
128-
(DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND")
117+
("year-month", YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"),
118+
("year-month", YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"),
119+
("year-month", YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"),
120+
("day-time", DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"),
121+
("day-time", DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"),
122+
("day-time", DT.DAY, DT.MINUTE) ->
123+
Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"),
124+
("day-time", DT.DAY, DT.SECOND) ->
125+
Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"),
126+
("day-time", DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"),
127+
("day-time", DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"),
128+
("day-time", DT.HOUR, DT.SECOND) ->
129+
Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"),
130+
("day-time", DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"),
131+
("day-time", DT.MINUTE, DT.SECOND) ->
132+
Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"),
133+
("day-time", DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND")
129134
)
130135

131136
def castStringToYMInterval(

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,7 +1174,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
11741174
Seq("INTERVAL '1-1' YEAR", "INTERVAL '1-1' MONTH").foreach { interval =>
11751175
val dataType = YearMonthIntervalType()
11761176
val expectedMsg = s"Interval string does not match year-month format of " +
1177-
s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
1177+
s"${IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField))
11781178
.map(format => s"`$format`").mkString(", ")} " +
11791179
s"when cast to ${dataType.typeName}: $interval"
11801180
checkExceptionInExpression[IllegalArgumentException](
@@ -1194,7 +1194,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
11941194
("INTERVAL '1' MONTH", YearMonthIntervalType(YEAR, MONTH)))
11951195
.foreach { case (interval, dataType) =>
11961196
val expectedMsg = s"Interval string does not match year-month format of " +
1197-
s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
1197+
s"${IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField))
11981198
.map(format => s"`$format`").mkString(", ")} " +
11991199
s"when cast to ${dataType.typeName}: $interval"
12001200
checkExceptionInExpression[IllegalArgumentException](
@@ -1314,7 +1314,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
13141314
("1.23", DayTimeIntervalType(MINUTE)))
13151315
.foreach { case (interval, dataType) =>
13161316
val expectedMsg = s"Interval string does not match day-time format of " +
1317-
s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
1317+
s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField))
13181318
.map(format => s"`$format`").mkString(", ")} " +
13191319
s"when cast to ${dataType.typeName}: $interval, " +
13201320
s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
@@ -1338,7 +1338,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
13381338
("INTERVAL '92233720368541.775807' SECOND", DayTimeIntervalType(SECOND)))
13391339
.foreach { case (interval, dataType) =>
13401340
val expectedMsg = "Interval string does not match day-time format of " +
1341-
s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
1341+
s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField))
13421342
.map(format => s"`$format`").mkString(", ")} " +
13431343
s"when cast to ${dataType.typeName}: $interval, " +
13441344
s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +

sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
19161916
{
19171917
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
19181918
"messageParameters" : {
1919-
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
1919+
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
19201920
},
19211921
"queryContext" : [ {
19221922
"objectType" : "",

sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
19161916
{
19171917
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
19181918
"messageParameters" : {
1919-
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
1919+
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
19201920
},
19211921
"queryContext" : [ {
19221922
"objectType" : "",

sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2355,7 +2355,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
23552355
{
23562356
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
23572357
"messageParameters" : {
2358-
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
2358+
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
23592359
},
23602360
"queryContext" : [ {
23612361
"objectType" : "",

sql/core/src/test/resources/sql-tests/results/interval.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2168,7 +2168,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
21682168
{
21692169
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
21702170
"messageParameters" : {
2171-
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
2171+
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
21722172
},
21732173
"queryContext" : [ {
21742174
"objectType" : "",

0 commit comments

Comments
 (0)