-
Notifications
You must be signed in to change notification settings - Fork 29.3k
[SPARK-31750][SQL] Eliminate UpCast if child's dataType is DecimalType #28572
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
a6db5d6
2a9c35a
fae0e54
2dc526f
b137ec4
b4eb291
8fe0490
6b70e77
bc0bbec
75f4f65
c0345d6
e7664a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3071,15 +3071,27 @@ class Analyzer( | |
| case p => p transformExpressions { | ||
| case u @ UpCast(child, _, _) if !child.resolved => u | ||
|
|
||
| case UpCast(child, dt: AtomicType, _) | ||
| case u @ UpCast(child, _, _) | ||
| if SQLConf.get.getConf(SQLConf.LEGACY_LOOSE_UPCAST) && | ||
| u.dataType.isInstanceOf[AtomicType] && | ||
| child.dataType == StringType => | ||
| Cast(child, dt.asNullable) | ||
|
|
||
| case UpCast(child, dataType, walkedTypePath) if !Cast.canUpCast(child.dataType, dataType) => | ||
| fail(child, dataType, walkedTypePath) | ||
|
|
||
| case UpCast(child, dataType, _) => Cast(child, dataType.asNullable) | ||
| Cast(child, u.dataType.asNullable) | ||
|
|
||
| case UpCast(child, target, walkedTypePath) | ||
|
Ngone51 marked this conversation as resolved.
Outdated
|
||
| if child.dataType.isInstanceOf[DecimalType] | ||
| && target == DecimalType | ||
| && walkedTypePath.nonEmpty => | ||
|
HyukjinKwon marked this conversation as resolved.
Outdated
|
||
| // SPARK-31750: for the case where data type is explicitly known, e.g, spark.read | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: |
||
| // .parquet("/tmp/file").as[BigDecimal], we will have UpCast(child, Decimal(38, 18)), | ||
| // where child's data type can be, e.g. Decimal(38, 0). In this kind of case, we | ||
| // actually should not do cast otherwise it will cause precision lost. Thus, we should | ||
| // eliminate the UpCast here to avoid precision lost. | ||
| child | ||
|
|
||
| case u @ UpCast(child, _, walkedTypePath) if !Cast.canUpCast(child.dataType, u.dataType) => | ||
| fail(child, u.dataType, walkedTypePath) | ||
|
|
||
| case u @ UpCast(child, _, _) => Cast(child, u.dataType.asNullable) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,9 +22,9 @@ import scala.reflect.runtime.universe.TypeTag | |
| import org.apache.spark.sql.AnalysisException | ||
| import org.apache.spark.sql.catalyst.InternalRow | ||
| import org.apache.spark.sql.catalyst.dsl.expressions._ | ||
| import org.apache.spark.sql.catalyst.expressions.Attribute | ||
| import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} | ||
| import org.apache.spark.sql.catalyst.plans.PlanTest | ||
| import org.apache.spark.sql.catalyst.util.GenericArrayData | ||
| import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData} | ||
| import org.apache.spark.sql.types._ | ||
| import org.apache.spark.unsafe.types.UTF8String | ||
|
|
||
|
|
@@ -247,6 +247,13 @@ class EncoderResolutionSuite extends PlanTest { | |
| """.stripMargin.trim + " of the field in the target object") | ||
| } | ||
|
|
||
| test("SPARK-31750: eliminate UpCast if child's dataType is DecimalType") { | ||
| val encoder = ExpressionEncoder[Seq[BigDecimal]] | ||
| val attr = Seq(AttributeReference("a", ArrayType(DecimalType(38, 0)))()) | ||
| // previously, it will fail because Decimal(38, 0) can not be casted to Decimal(38, 18) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| testFromRow(encoder, attr, InternalRow(ArrayData.toArrayData(Array(Decimal(1.0))))) | ||
| } | ||
|
|
||
| // test for leaf types | ||
| castSuccess[Int, Long] | ||
| castSuccess[java.sql.Date, java.sql.Timestamp] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2439,6 +2439,17 @@ class DataFrameSuite extends QueryTest | |
| val nestedDecArray = Array(decSpark) | ||
| checkAnswer(Seq(nestedDecArray).toDF(), Row(Array(wrapRefArray(decJava)))) | ||
| } | ||
|
|
||
| test("SPARK-31750: eliminate UpCast if child's dataType is DecimalType") { | ||
| withTempPath { f => | ||
| sql("select cast(11111111111111111111111111111111111111 as decimal(38, 0)) as d") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this test can still reproduce the bug even if we use
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. It depends on the precision/scale rather than the value itself.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can make it shorter.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this test can still reproduce the bug even if we use
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I've changed it to |
||
| .write.mode("overwrite") | ||
| .parquet(f.getAbsolutePath) | ||
|
|
||
| val df = spark.read.parquet(f.getAbsolutePath).as[BigDecimal] | ||
| assert(df.schema === new StructType().add(StructField("d", DecimalType(38, 0)))) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| case class GroupByKey(a: Int, b: Int) | ||
Uh oh!
There was an error while loading. Please reload this page.