-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-33354][SQL] New explicit cast syntax rules in ANSI mode #30260
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
18b49bf
f74c488
7bfb1a6
e6faf4b
33452cd
ce0e775
1d57a24
6003bef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -25,6 +25,7 @@ import java.util.concurrent.TimeUnit._ | |||||||||||
| import org.apache.spark.SparkException | ||||||||||||
| import org.apache.spark.sql.catalyst.InternalRow | ||||||||||||
| import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion} | ||||||||||||
| import org.apache.spark.sql.catalyst.expressions.Cast.{canCast, forceNullable, resolvableNullability} | ||||||||||||
| import org.apache.spark.sql.catalyst.expressions.codegen._ | ||||||||||||
| import org.apache.spark.sql.catalyst.expressions.codegen.Block._ | ||||||||||||
| import org.apache.spark.sql.catalyst.util._ | ||||||||||||
|
|
@@ -258,13 +259,18 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | |||||||||||
|
|
||||||||||||
| def dataType: DataType | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
| * Returns true iff we can cast `from` type to `to` type. | ||||||||||||
| */ | ||||||||||||
| def canCast(from: DataType, to: DataType): Boolean | ||||||||||||
|
|
||||||||||||
| override def toString: String = { | ||||||||||||
| val ansi = if (ansiEnabled) "ansi_" else "" | ||||||||||||
| s"${ansi}cast($child as ${dataType.simpleString})" | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| override def checkInputDataTypes(): TypeCheckResult = { | ||||||||||||
| if (Cast.canCast(child.dataType, dataType)) { | ||||||||||||
| if (canCast(child.dataType, dataType)) { | ||||||||||||
| TypeCheckResult.TypeCheckSuccess | ||||||||||||
| } else { | ||||||||||||
| TypeCheckResult.TypeCheckFailure( | ||||||||||||
|
|
@@ -1753,6 +1759,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String | |||||||||||
| copy(timeZoneId = Option(timeZoneId)) | ||||||||||||
|
|
||||||||||||
| override protected val ansiEnabled: Boolean = SQLConf.get.ansiEnabled | ||||||||||||
|
|
||||||||||||
| override def canCast(from: DataType, to: DataType): Boolean = if (ansiEnabled) { | ||||||||||||
| AnsiCast.canCast(from, to) | ||||||||||||
| } else { | ||||||||||||
| Cast.canCast(from, to) | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
|
|
@@ -1770,6 +1782,64 @@ case class AnsiCast(child: Expression, dataType: DataType, timeZoneId: Option[St | |||||||||||
| copy(timeZoneId = Option(timeZoneId)) | ||||||||||||
|
|
||||||||||||
| override protected val ansiEnabled: Boolean = true | ||||||||||||
|
|
||||||||||||
| override def canCast(from: DataType, to: DataType): Boolean = AnsiCast.canCast(from, to) | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| object AnsiCast { | ||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you leave some comments to summarize the current behaivour of the ANSI explicit cast as described in the PR description (references |
||||||||||||
| def canCast(from: DataType, to: DataType): Boolean = (from, to) match { | ||||||||||||
| case (fromType, toType) if fromType == toType => true | ||||||||||||
|
|
||||||||||||
| case (NullType, _) => true | ||||||||||||
|
|
||||||||||||
| case (_: NumericType, _: NumericType) => true | ||||||||||||
| case (StringType, _: NumericType) => true | ||||||||||||
| case (BooleanType, _: NumericType) => true | ||||||||||||
|
Comment on lines
+1854
to
+1856
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Just a suggestion) For readability, could you reorder these entries according to spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala Lines 70 to 74 in 35ac314
|
||||||||||||
|
|
||||||||||||
| case (_: NumericType, StringType) => true | ||||||||||||
| case (_: DateType, StringType) => true | ||||||||||||
| case (_: TimestampType, StringType) => true | ||||||||||||
| case (_: CalendarIntervalType, StringType) => true | ||||||||||||
| case (BooleanType, StringType) => true | ||||||||||||
| case (BinaryType, StringType) => true | ||||||||||||
|
|
||||||||||||
| case (StringType, DateType) => true | ||||||||||||
| case (TimestampType, DateType) => true | ||||||||||||
|
|
||||||||||||
| case (StringType, TimestampType) => true | ||||||||||||
| case (DateType, TimestampType) => true | ||||||||||||
|
|
||||||||||||
| case (StringType, _: CalendarIntervalType) => true | ||||||||||||
|
|
||||||||||||
| case (StringType, BooleanType) => true | ||||||||||||
| case (_: NumericType, BooleanType) => true | ||||||||||||
|
|
||||||||||||
| case (StringType, _: BinaryType) => true | ||||||||||||
|
|
||||||||||||
| case (ArrayType(fromType, fn), ArrayType(toType, tn)) => | ||||||||||||
| canCast(fromType, toType) && | ||||||||||||
| resolvableNullability(fn || forceNullable(fromType, toType), tn) | ||||||||||||
|
|
||||||||||||
| case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) => | ||||||||||||
| canCast(fromKey, toKey) && | ||||||||||||
| (!forceNullable(fromKey, toKey)) && | ||||||||||||
| canCast(fromValue, toValue) && | ||||||||||||
| resolvableNullability(fn || forceNullable(fromValue, toValue), tn) | ||||||||||||
|
|
||||||||||||
| case (StructType(fromFields), StructType(toFields)) => | ||||||||||||
| fromFields.length == toFields.length && | ||||||||||||
| fromFields.zip(toFields).forall { | ||||||||||||
| case (fromField, toField) => | ||||||||||||
| canCast(fromField.dataType, toField.dataType) && | ||||||||||||
| resolvableNullability( | ||||||||||||
| fromField.nullable || forceNullable(fromField.dataType, toField.dataType), | ||||||||||||
| toField.nullable) | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| case (udt1: UserDefinedType[_], udt2: UserDefinedType[_]) if udt2.acceptsType(udt1) => true | ||||||||||||
|
|
||||||||||||
| case _ => false | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
|
|
||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about describing this new behaviour in the usage above of
ExpressionDescription?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
well, then we need to mention about the behavior of throwing overflow exceptions when ANSI flag enabled. I will add some content in the
sql-ref-ansi-compliance.md