Skip to content

Commit 074fcf2

Browse files
committed
[SPARK-47072][SQL] Fix supported interval formats in error messages
### What changes were proposed in this pull request? In the PR, I propose to add one more field to keys of `supportedFormat` in `IntervalUtils` because current implementation has duplicate keys that overwrites each other. For instance, the following keys are the same: ``` (YM.YEAR, YM.MONTH) ... (DT.DAY, DT.HOUR) ``` because `YM.YEAR = DT.DAY = 0` and `YM.MONTH = DT.HOUR = 1`. ### Why are the changes needed? To fix the incorrect error message when Spark cannot parse ANSI interval string. For example, the expected format should be some year-month format but Spark outputs day-time one: ```sql spark-sql (default)> select interval '-\t2-2\t' year to month; Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: - 2-2 . (line 1, pos 16) == SQL == select interval '-\t2-2\t' year to month ----------------^^^ ``` ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? By running the existing test suite: ``` $ build/sbt "test:testOnly *IntervalUtilsSuite" ``` and regenerating the golden files: ``` $ SPARK_GENERATE_GOLDEN_FILES=1 PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#45127 from MaxGekk/fix-supportedFormat. Authored-by: Max Gekk <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent 64fa13b commit 074fcf2

File tree

6 files changed

+26
-22
lines changed

6 files changed

+26
-22
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -110,27 +110,31 @@ object IntervalUtils extends SparkIntervalUtils {
110110
errorClass = "_LEGACY_ERROR_TEMP_3214",
111111
messageParameters = Map(
112112
"intervalStr" -> intervalStr,
113-
"supportedFormat" -> supportedFormat((startFiled, endField))
113+
"supportedFormat" -> supportedFormat((intervalStr, startFiled, endField))
114114
.map(format => s"`$format`").mkString(", "),
115115
"typeName" -> typeName,
116116
"input" -> input.toString,
117117
"fallBackNotice" -> fallBackNotice.map(s => s", $s").getOrElse("")))
118118
}
119119

120120
val supportedFormat = Map(
121-
(YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"),
122-
(YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"),
123-
(YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"),
124-
(DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"),
125-
(DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"),
126-
(DT.DAY, DT.MINUTE) -> Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"),
127-
(DT.DAY, DT.SECOND) -> Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"),
128-
(DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"),
129-
(DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"),
130-
(DT.HOUR, DT.SECOND) -> Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"),
131-
(DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"),
132-
(DT.MINUTE, DT.SECOND) -> Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"),
133-
(DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND")
121+
("year-month", YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"),
122+
("year-month", YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"),
123+
("year-month", YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"),
124+
("day-time", DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"),
125+
("day-time", DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"),
126+
("day-time", DT.DAY, DT.MINUTE) ->
127+
Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"),
128+
("day-time", DT.DAY, DT.SECOND) ->
129+
Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"),
130+
("day-time", DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"),
131+
("day-time", DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"),
132+
("day-time", DT.HOUR, DT.SECOND) ->
133+
Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"),
134+
("day-time", DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"),
135+
("day-time", DT.MINUTE, DT.SECOND) ->
136+
Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"),
137+
("day-time", DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND")
134138
)
135139

136140
def castStringToYMInterval(

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,7 +1181,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
11811181
"fallBackNotice" -> "",
11821182
"typeName" -> "interval year to month",
11831183
"intervalStr" -> "year-month",
1184-
"supportedFormat" -> "`[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR`",
1184+
"supportedFormat" -> "`[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH`",
11851185
"input" -> interval)
11861186
)
11871187
}
@@ -1204,7 +1204,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
12041204
"typeName" -> dataType.typeName,
12051205
"intervalStr" -> "year-month",
12061206
"supportedFormat" ->
1207-
IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
1207+
IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField))
12081208
.map(format => s"`$format`").mkString(", "),
12091209
"input" -> interval))
12101210
}
@@ -1329,7 +1329,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
13291329
"typeName" -> dataType.typeName,
13301330
"input" -> interval,
13311331
"supportedFormat" ->
1332-
IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
1332+
IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField))
13331333
.map(format => s"`$format`").mkString(", "))
13341334
)
13351335
}
@@ -1355,7 +1355,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
13551355
"typeName" -> dataType.typeName,
13561356
"input" -> interval,
13571357
"supportedFormat" ->
1358-
IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
1358+
IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField))
13591359
.map(format => s"`$format`").mkString(", ")))
13601360
}
13611361
}

sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
19161916
{
19171917
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
19181918
"messageParameters" : {
1919-
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
1919+
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
19201920
},
19211921
"queryContext" : [ {
19221922
"objectType" : "",

sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
19161916
{
19171917
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
19181918
"messageParameters" : {
1919-
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
1919+
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
19201920
},
19211921
"queryContext" : [ {
19221922
"objectType" : "",

sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2355,7 +2355,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
23552355
{
23562356
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
23572357
"messageParameters" : {
2358-
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
2358+
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
23592359
},
23602360
"queryContext" : [ {
23612361
"objectType" : "",

sql/core/src/test/resources/sql-tests/results/interval.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2168,7 +2168,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
21682168
{
21692169
"errorClass" : "_LEGACY_ERROR_TEMP_0063",
21702170
"messageParameters" : {
2171-
"msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
2171+
"msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
21722172
},
21732173
"queryContext" : [ {
21742174
"objectType" : "",

0 commit comments

Comments
 (0)