From 1cd459f97020dad5b10842c24682aeb2eaac7b38 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Mon, 17 Sep 2018 22:28:54 -0700 Subject: [PATCH 1/4] Improve findTightestCommonType to coerce Integral and decimal types --- .../sql/catalyst/analysis/TypeCoercion.scala | 27 ++++++++++++++++--- .../catalyst/analysis/TypeCoercionSuite.scala | 19 ++++++++++++- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 288b6358fbff..727161fd1bb3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -21,6 +21,7 @@ import javax.annotation.Nullable import scala.annotation.tailrec import scala.collection.mutable +import scala.math._ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.expressions._ @@ -31,6 +32,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ + /** * A collection of [[Rule]] that can be used to coerce differing types that participate in * operations into compatible ones. @@ -89,10 +91,11 @@ object TypeCoercion { case (NullType, t1) => Some(t1) case (t1, NullType) => Some(t1) - case (t1: IntegralType, t2: DecimalType) if t2.isWiderThan(t1) => - Some(t2) - case (t1: DecimalType, t2: IntegralType) if t1.isWiderThan(t2) => - Some(t1) + case (t1: IntegralType, t2: DecimalType) => + findWiderDecimalType(DecimalType.forType(t1), t2) + + case (t1: DecimalType, t2: IntegralType) => + findWiderDecimalType(t1, DecimalType.forType(t2)) // Promote numeric types to the highest of the two case (t1: NumericType, t2: NumericType) @@ -106,6 +109,22 @@ object TypeCoercion { case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType) } + /** + * Finds a wider decimal type between the two supplied decimal types without + * any loss of precision. + */ + def findWiderDecimalType(d1: DecimalType, d2: DecimalType): Option[DecimalType] = { + val scale = max(d1.scale, d2.scale) + val range = max(d1.precision - d1.scale, d2.precision - d2.scale) + + // Check the resultant decimal type does not exceed the allowable limits. + if (range + scale <= DecimalType.MAX_PRECISION && scale <= DecimalType.MAX_SCALE ) { + Some(DecimalType(range + scale, scale)) + } else { + None + } + } + /** Promotes all the way to StringType. */ private def stringPromotion(dt1: DataType, dt2: DataType): Option[DataType] = (dt1, dt2) match { case (StringType, t2: AtomicType) if t2 != BinaryType && t2 != BooleanType => Some(StringType) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index 461eda4334bb..b0fa064059be 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -366,7 +366,24 @@ class TypeCoercionSuite extends AnalysisTest { // No up-casting for fixed-precision decimal (this is handled by arithmetic rules) widenTest(DecimalType(2, 1), DecimalType(3, 2), None) widenTest(DecimalType(2, 1), DoubleType, None) - widenTest(DecimalType(2, 1), IntegerType, None) + widenTest(DecimalType(2, 1), IntegerType, Some(DecimalType(11, 1))) + widenTest(DecimalType(2, 1), IntegerType, Some(DecimalType(11, 1))) + widenTest(DecimalType(2, 2), IntegerType, Some(DecimalType(12, 2))) + widenTest(DecimalType(4, 2), IntegerType, Some(DecimalType(12, 2))) + widenTest(DecimalType(10, 2), IntegerType, Some(DecimalType(12, 2))) + widenTest(DecimalType(38, 18), IntegerType, Some(DecimalType(38, 18))) + widenTest(DecimalType(38, 28), IntegerType, Some(DecimalType(38, 28))) + widenTest(DecimalType(38, 29), IntegerType, None) + widenTest(DecimalType(38, 38), IntegerType, None) + widenTest(DecimalType(2, 1), LongType, Some(DecimalType(21, 1))) + widenTest(DecimalType(2, 1), LongType, Some(DecimalType(21, 1))) + widenTest(DecimalType(2, 2), LongType, Some(DecimalType(22, 2))) + widenTest(DecimalType(4, 2), LongType, Some(DecimalType(22, 2))) + widenTest(DecimalType(10, 2), LongType, Some(DecimalType(22, 2))) + widenTest(DecimalType(38, 18), LongType, Some(DecimalType(38, 18))) + widenTest(DecimalType(38, 18), LongType, Some(DecimalType(38, 18))) + widenTest(DecimalType(38, 19), LongType, None) + widenTest(DecimalType(38, 38), LongType, None) widenTest(DoubleType, DecimalType(2, 1), None) // StringType From 6c29f05d40a99366f41eadede84dababc4fc9d7b Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Mon, 17 Sep 2018 22:52:14 -0700 Subject: [PATCH 2/4] remove space --- .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 727161fd1bb3..aa95c178cdf3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -93,7 +93,6 @@ object TypeCoercion { case (t1: IntegralType, t2: DecimalType) => findWiderDecimalType(DecimalType.forType(t1), t2) - case (t1: DecimalType, t2: IntegralType) => findWiderDecimalType(t1, DecimalType.forType(t2)) From 0de332801c4b50f175b4c2eb0205096bcd4093b3 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Tue, 18 Sep 2018 12:09:22 -0700 Subject: [PATCH 3/4] Add more tests with negative scale --- .../apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 3 +-- .../spark/sql/catalyst/analysis/TypeCoercionSuite.scala | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index aa95c178cdf3..8673d2411e81 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -32,7 +32,6 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ - /** * A collection of [[Rule]] that can be used to coerce differing types that participate in * operations into compatible ones. @@ -117,7 +116,7 @@ object TypeCoercion { val range = max(d1.precision - d1.scale, d2.precision - d2.scale) // Check the resultant decimal type does not exceed the allowable limits. - if (range + scale <= DecimalType.MAX_PRECISION && scale <= DecimalType.MAX_SCALE ) { + if (range + scale <= DecimalType.MAX_PRECISION && scale <= DecimalType.MAX_SCALE) { Some(DecimalType(range + scale, scale)) } else { None diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index b0fa064059be..5b4aaf2659a2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -375,6 +375,11 @@ class TypeCoercionSuite extends AnalysisTest { widenTest(DecimalType(38, 28), IntegerType, Some(DecimalType(38, 28))) widenTest(DecimalType(38, 29), IntegerType, None) widenTest(DecimalType(38, 38), IntegerType, None) + widenTest(DecimalType(3, -2), IntegerType, Some(DecimalType(10, 0))) + widenTest(DecimalType(11, -2), IntegerType, Some(DecimalType(13, 0))) + widenTest(DecimalType(36, -2), IntegerType, Some(DecimalType(38, 0))) + widenTest(DecimalType(37, -2), IntegerType, None) + widenTest(DecimalType(1, -38), IntegerType, None) widenTest(DecimalType(2, 1), LongType, Some(DecimalType(21, 1))) widenTest(DecimalType(2, 1), LongType, Some(DecimalType(21, 1))) widenTest(DecimalType(2, 2), LongType, Some(DecimalType(22, 2))) From 2304c7d759ee950785b81219b48d8a441741bd83 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Tue, 18 Sep 2018 21:37:44 -0700 Subject: [PATCH 4/4] review --- .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 8673d2411e81..819d8487d901 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -116,7 +116,7 @@ object TypeCoercion { val range = max(d1.precision - d1.scale, d2.precision - d2.scale) // Check the resultant decimal type does not exceed the allowable limits. - if (range + scale <= DecimalType.MAX_PRECISION && scale <= DecimalType.MAX_SCALE) { + if (range + scale <= DecimalType.MAX_PRECISION) { Some(DecimalType(range + scale, scale)) } else { None