Skip to content

Commit 4015f78

Browse files
yifhuamanuzhangturboFei
authored andcommitted
[HADP-52545][HADP-43405] Implicitly cast second argument of date_add/date_sub for Spark 2 compatibility (apache#286)
* [HADP-43405] Implicitly cast second argument of date_add/date_sub for Spark 2 compatibility (apache#60) This PR adds a new mixin `LegacyCastInputTypes` for analyzer to perform implicit type casting when `spark.sql.legacy.implicitCastInputTypes` is true for Spark 2 compatibility. apache#26412 broke Spark 2 compatibility by not implicitly casting second argument of `date_add/date_sub` functions. `DateAdd/DateSub` now extends `LegacyCastInputTypes` with `spark.sql.legacy.implicitCastInputTypes=false` by default so the default behavior is not changed. We will enable the config in Panda. No. Add UT. Co-authored-by: tianlzhang <[email protected]> Co-authored-by: Wang, Fei <[email protected]>
1 parent a99df78 commit 4015f78

File tree

9 files changed

+159
-7
lines changed

9 files changed

+159
-7
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -706,12 +706,11 @@ abstract class TypeCoercionBase {
706706
}
707707
}.getOrElse(b) // If there is no applicable conversion, leave expression unchanged.
708708

709+
case e: LegacyCastInputTypes if e.implicitCast && e.inputTypes.nonEmpty =>
710+
handleImplicitCast(e)
711+
709712
case e: ImplicitCastInputTypes if e.inputTypes.nonEmpty =>
710-
val children: Seq[Expression] = e.children.zip(e.inputTypes).map { case (in, expected) =>
711-
// If we cannot do the implicit cast, just use the original input.
712-
implicitCast(in, expected).getOrElse(in)
713-
}
714-
e.withNewChildren(children)
713+
handleImplicitCast(e)
715714

716715
case e: ExpectsInputTypes if e.inputTypes.nonEmpty =>
717716
// Convert NullType into some specific target type for ExpectsInputTypes that don't do
@@ -767,6 +766,14 @@ abstract class TypeCoercionBase {
767766
case (_, other) => other
768767
}
769768
}
769+
private def handleImplicitCast(e: ExpectsInputTypes): Expression = {
770+
val children: Seq[Expression] = e.children.zip(e.inputTypes).map { case (in, expected) =>
771+
// If we cannot do the implicit cast, just use the original input.
772+
implicitCast(in, expected).getOrElse(in)
773+
}
774+
e.withNewChildren(children)
775+
}
776+
770777
}
771778

772779
/**

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
2020
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
2121
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
2222
import org.apache.spark.sql.errors.QueryErrorsBase
23+
import org.apache.spark.sql.internal.SQLConf
2324
import org.apache.spark.sql.types.AbstractDataType
2425

2526
/**
@@ -74,3 +75,13 @@ object ExpectsInputTypes extends QueryErrorsBase {
7475
trait ImplicitCastInputTypes extends ExpectsInputTypes {
7576
// No other methods
7677
}
78+
79+
/**
80+
* A mixin for the analyzer to perform implicit type casting
81+
* based on spark.sql.legacy.implicitCastInputTypes
82+
* for Spark 2 compatibility (HADP-43405)
83+
*/
84+
trait LegacyCastInputTypes extends ExpectsInputTypes {
85+
86+
def implicitCast: Boolean = SQLConf.get.implicitCastInputTypes
87+
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ case class CurrentBatchTimestamp(
320320
group = "datetime_funcs",
321321
since = "1.5.0")
322322
case class DateAdd(startDate: Expression, days: Expression)
323-
extends BinaryExpression with ExpectsInputTypes with NullIntolerant {
323+
extends BinaryExpression with LegacyCastInputTypes with NullIntolerant {
324324

325325
override def left: Expression = startDate
326326
override def right: Expression = days
@@ -359,7 +359,7 @@ case class DateAdd(startDate: Expression, days: Expression)
359359
group = "datetime_funcs",
360360
since = "1.5.0")
361361
case class DateSub(startDate: Expression, days: Expression)
362-
extends BinaryExpression with ExpectsInputTypes with NullIntolerant {
362+
extends BinaryExpression with LegacyCastInputTypes with NullIntolerant {
363363
override def left: Expression = startDate
364364
override def right: Expression = days
365365

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4242,6 +4242,14 @@ object SQLConf {
42424242
.booleanConf
42434243
.createWithDefault(false)
42444244

4245+
val LEGACY_IMPLICIT_CAST_INPUT_TYPES =
4246+
buildConf("spark.sql.legacy.implicitCastInputTypes")
4247+
.internal()
4248+
.doc("If it is set to true, type coercion of LegacyCastInputTypes will be the same as " +
4249+
"ImplicitCastInputTypes; otherwise, it's same as ExpectsInputTypes")
4250+
.booleanConf
4251+
.createWithDefault(false)
4252+
42454253
val DEFAULT_CATALOG = buildConf("spark.sql.defaultCatalog")
42464254
.doc("Name of the default catalog. This will be the current catalog if users have not " +
42474255
"explicitly set the current catalog yet.")
@@ -6174,6 +6182,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
61746182

61756183
def quotaFreeBlockListDir: Option[String] = getConf(QUOTA_FREE_BLOCK_LIST_DIR)
61766184

6185+
def implicitCastInputTypes: Boolean = getConf(SQLConf.LEGACY_IMPLICIT_CAST_INPUT_TYPES)
6186+
61776187
def csvFilterPushDown: Boolean = getConf(CSV_FILTER_PUSHDOWN_ENABLED)
61786188

61796189
def jsonFilterPushDown: Boolean = getConf(JSON_FILTER_PUSHDOWN_ENABLED)

sql/core/src/test/resources/sql-tests/analyzer-results/ansi/date.sql.out

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,3 +953,29 @@ org.apache.spark.sql.catalyst.parser.ParseException
953953
"fragment" : "datediff('YEAR', date'2022-02-25', date'2023-02-25')"
954954
} ]
955955
}
956+
957+
958+
-- !query
959+
set spark.sql.legacy.implicitCastInputTypes=true
960+
-- !query analysis
961+
SetCommand (spark.sql.legacy.implicitCastInputTypes,Some(true))
962+
963+
964+
-- !query
965+
select date_add('2011-11-11', 1L)
966+
-- !query analysis
967+
Project [date_add(cast(2011-11-11 as date), cast(1 as int)) AS date_add(2011-11-11, 1)#x]
968+
+- OneRowRelation
969+
970+
971+
-- !query
972+
select date_sub('2011-11-11', 1L)
973+
-- !query analysis
974+
Project [date_sub(cast(2011-11-11 as date), cast(1 as int)) AS date_sub(2011-11-11, 1)#x]
975+
+- OneRowRelation
976+
977+
978+
-- !query
979+
set spark.sql.legacy.implicitCastInputTypes=false
980+
-- !query analysis
981+
SetCommand (spark.sql.legacy.implicitCastInputTypes,Some(false))

sql/core/src/test/resources/sql-tests/analyzer-results/date.sql.out

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,3 +1028,29 @@ org.apache.spark.sql.catalyst.parser.ParseException
10281028
"fragment" : "datediff('YEAR', date'2022-02-25', date'2023-02-25')"
10291029
} ]
10301030
}
1031+
1032+
1033+
-- !query
1034+
set spark.sql.legacy.implicitCastInputTypes=true
1035+
-- !query analysis
1036+
SetCommand (spark.sql.legacy.implicitCastInputTypes,Some(true))
1037+
1038+
1039+
-- !query
1040+
select date_add('2011-11-11', 1L)
1041+
-- !query analysis
1042+
Project [date_add(cast(2011-11-11 as date), cast(1 as int)) AS date_add(2011-11-11, 1)#x]
1043+
+- OneRowRelation
1044+
1045+
1046+
-- !query
1047+
select date_sub('2011-11-11', 1L)
1048+
-- !query analysis
1049+
Project [date_sub(cast(2011-11-11 as date), cast(1 as int)) AS date_sub(2011-11-11, 1)#x]
1050+
+- OneRowRelation
1051+
1052+
1053+
-- !query
1054+
set spark.sql.legacy.implicitCastInputTypes=false
1055+
-- !query analysis
1056+
SetCommand (spark.sql.legacy.implicitCastInputTypes,Some(false))

sql/core/src/test/resources/sql-tests/inputs/date.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,11 @@ select date_diff(YEAR, date'2022-02-25', date'2023-02-25');
171171

172172
select date_diff('MILLISECOND', timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455');
173173
select datediff('YEAR', date'2022-02-25', date'2023-02-25');
174+
175+
-- [HADP-43405] Implicitly cast second argument of date_add/date_sub for Spark 2 compatibility
176+
set spark.sql.legacy.implicitCastInputTypes=true;
177+
178+
select date_add('2011-11-11', 1L);
179+
select date_sub('2011-11-11', 1L);
180+
181+
set spark.sql.legacy.implicitCastInputTypes=false;

sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,3 +1215,35 @@ org.apache.spark.sql.catalyst.parser.ParseException
12151215
"fragment" : "datediff('YEAR', date'2022-02-25', date'2023-02-25')"
12161216
} ]
12171217
}
1218+
1219+
1220+
-- !query
1221+
set spark.sql.legacy.implicitCastInputTypes=true
1222+
-- !query schema
1223+
struct<key:string,value:string>
1224+
-- !query output
1225+
spark.sql.legacy.implicitCastInputTypes true
1226+
1227+
1228+
-- !query
1229+
select date_add('2011-11-11', 1L)
1230+
-- !query schema
1231+
struct<date_add(2011-11-11, 1):date>
1232+
-- !query output
1233+
2011-11-12
1234+
1235+
1236+
-- !query
1237+
select date_sub('2011-11-11', 1L)
1238+
-- !query schema
1239+
struct<date_sub(2011-11-11, 1):date>
1240+
-- !query output
1241+
2011-11-10
1242+
1243+
1244+
-- !query
1245+
set spark.sql.legacy.implicitCastInputTypes=false
1246+
-- !query schema
1247+
struct<key:string,value:string>
1248+
-- !query output
1249+
spark.sql.legacy.implicitCastInputTypes false

sql/core/src/test/resources/sql-tests/results/date.sql.out

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,3 +1221,35 @@ org.apache.spark.sql.catalyst.parser.ParseException
12211221
"fragment" : "datediff('YEAR', date'2022-02-25', date'2023-02-25')"
12221222
} ]
12231223
}
1224+
1225+
1226+
-- !query
1227+
set spark.sql.legacy.implicitCastInputTypes=true
1228+
-- !query schema
1229+
struct<key:string,value:string>
1230+
-- !query output
1231+
spark.sql.legacy.implicitCastInputTypes true
1232+
1233+
1234+
-- !query
1235+
select date_add('2011-11-11', 1L)
1236+
-- !query schema
1237+
struct<date_add(2011-11-11, 1):date>
1238+
-- !query output
1239+
2011-11-12
1240+
1241+
1242+
-- !query
1243+
select date_sub('2011-11-11', 1L)
1244+
-- !query schema
1245+
struct<date_sub(2011-11-11, 1):date>
1246+
-- !query output
1247+
2011-11-10
1248+
1249+
1250+
-- !query
1251+
set spark.sql.legacy.implicitCastInputTypes=false
1252+
-- !query schema
1253+
struct<key:string,value:string>
1254+
-- !query output
1255+
spark.sql.legacy.implicitCastInputTypes false

0 commit comments

Comments
 (0)