From 9459e6e78142f28c209eba3ae3134564004970ec Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 8 Aug 2018 23:35:40 +0800 Subject: [PATCH 01/11] Unify the InConversion and BinaryComparison behaviour when InConversion's list only contains one datatype --- .../sql/catalyst/analysis/TypeCoercion.scala | 14 ++++- .../typeCoercion/native/inConversion.sql.out | 56 +++++++++---------- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 27839d72c6306..603bfddab0ebe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -486,9 +486,17 @@ object TypeCoercion { } case i @ In(a, b) if b.exists(_.dataType != a.dataType) => - findWiderCommonType(i.children.map(_.dataType)) match { - case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType))) - case None => i + if (b.map(_.dataType).distinct.size == 1) { + findCommonTypeForBinaryComparison(a.dataType, b.head.dataType, conf) + .orElse(findWiderTypeForTwo(a.dataType, b.head.dataType)) match { + case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType))) + case None => i + } + } else { + findWiderCommonType(i.children.map(_.dataType)) match { + case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType))) + case None => i + } } } } diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out index 875ccc1341ec4..9831c57101a32 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out @@ -69,7 +69,7 @@ true -- !query 8 SELECT cast(1 as tinyint) in (cast(1 as string)) FROM t -- !query 8 schema -struct<(CAST(CAST(1 AS TINYINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS TINYINT) AS TINYINT) IN (CAST(CAST(1 AS STRING) AS TINYINT))):boolean> -- !query 8 output true @@ -169,7 +169,7 @@ true -- !query 20 SELECT cast(1 as smallint) in (cast(1 as string)) FROM t -- !query 20 schema -struct<(CAST(CAST(1 AS SMALLINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS SMALLINT) AS SMALLINT) IN (CAST(CAST(1 AS STRING) AS SMALLINT))):boolean> -- !query 20 output true @@ -269,7 +269,7 @@ true -- !query 32 SELECT cast(1 as int) in (cast(1 as string)) FROM t -- !query 32 schema -struct<(CAST(CAST(1 AS INT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS STRING) AS INT))):boolean> -- !query 32 output true @@ -369,7 +369,7 @@ true -- !query 44 SELECT cast(1 as bigint) in (cast(1 as string)) FROM t -- !query 44 schema -struct<(CAST(CAST(1 AS BIGINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS STRING) AS BIGINT))):boolean> -- !query 44 output true @@ -469,9 +469,9 @@ true -- !query 56 SELECT cast(1 as float) in (cast(1 as string)) FROM t -- !query 56 schema -struct<(CAST(CAST(1 AS FLOAT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS STRING) AS FLOAT))):boolean> -- !query 56 output -false +true -- !query 57 @@ -569,9 +569,9 @@ true -- !query 68 SELECT cast(1 as double) in (cast(1 as string)) FROM t -- !query 68 schema -struct<(CAST(CAST(1 AS DOUBLE) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS STRING) AS DOUBLE))):boolean> -- !query 68 output -false +true -- !query 69 @@ -669,7 +669,7 @@ true -- !query 80 SELECT cast(1 as decimal(10, 0)) in (cast(1 as string)) FROM t -- !query 80 schema -struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS STRING) AS DOUBLE))):boolean> -- !query 80 output true @@ -713,7 +713,7 @@ cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST('2017-12-11 09:30:00' AS DATE -- !query 85 SELECT cast(1 as string) in (cast(1 as tinyint)) FROM t -- !query 85 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS TINYINT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS TINYINT) IN (CAST(CAST(1 AS TINYINT) AS TINYINT))):boolean> -- !query 85 output true @@ -721,7 +721,7 @@ true -- !query 86 SELECT cast(1 as string) in (cast(1 as smallint)) FROM t -- !query 86 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS SMALLINT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS SMALLINT) IN (CAST(CAST(1 AS SMALLINT) AS SMALLINT))):boolean> -- !query 86 output true @@ -729,7 +729,7 @@ true -- !query 87 SELECT cast(1 as string) in (cast(1 as int)) FROM t -- !query 87 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS INT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS INT) IN (CAST(CAST(1 AS INT) AS INT))):boolean> -- !query 87 output true @@ -737,7 +737,7 @@ true -- !query 88 SELECT cast(1 as string) in (cast(1 as bigint)) FROM t -- !query 88 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS BIGINT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean> -- !query 88 output true @@ -745,23 +745,23 @@ true -- !query 89 SELECT cast(1 as string) in (cast(1 as float)) FROM t -- !query 89 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS FLOAT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean> -- !query 89 output -false +true -- !query 90 SELECT cast(1 as string) in (cast(1 as double)) FROM t -- !query 90 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS DOUBLE) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean> -- !query 90 output -false +true -- !query 91 SELECT cast(1 as string) in (cast(1 as decimal(10, 0))) FROM t -- !query 91 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean> -- !query 91 output true @@ -777,19 +777,17 @@ true -- !query 93 SELECT cast(1 as string) in (cast('1' as binary)) FROM t -- !query 93 schema -struct<> +struct<(CAST(CAST(1 AS STRING) AS BINARY) IN (CAST(CAST(1 AS BINARY) AS BINARY))):boolean> -- !query 93 output -org.apache.spark.sql.AnalysisException -cannot resolve '(CAST(1 AS STRING) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: string != binary; line 1 pos 25 +true -- !query 94 SELECT cast(1 as string) in (cast(1 as boolean)) FROM t -- !query 94 schema -struct<> +struct<(CAST(CAST(1 AS STRING) AS BOOLEAN) IN (CAST(CAST(1 AS BOOLEAN) AS BOOLEAN))):boolean> -- !query 94 output -org.apache.spark.sql.AnalysisException -cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: string != boolean; line 1 pos 25 +true -- !query 95 @@ -874,10 +872,9 @@ cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS DECIMAL(10,0))))' due to data -- !query 104 SELECT cast('1' as binary) in (cast(1 as string)) FROM t -- !query 104 schema -struct<> +struct<(CAST(CAST(1 AS BINARY) AS BINARY) IN (CAST(CAST(1 AS STRING) AS BINARY))):boolean> -- !query 104 output -org.apache.spark.sql.AnalysisException -cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: binary != string; line 1 pos 27 +true -- !query 105 @@ -981,10 +978,9 @@ cannot resolve '(true IN (CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: -- !query 116 SELECT true in (cast(1 as string)) FROM t -- !query 116 schema -struct<> +struct<(CAST(true AS BOOLEAN) IN (CAST(CAST(1 AS STRING) AS BOOLEAN))):boolean> -- !query 116 output -org.apache.spark.sql.AnalysisException -cannot resolve '(true IN (CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: boolean != string; line 1 pos 12 +true -- !query 117 From c4775c4f3255a680c5cffe2a7cd631f22a93832a Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 9 Aug 2018 14:05:15 +0800 Subject: [PATCH 02/11] findWiderTypeForTwo -> findWiderTypeWithoutStringPromotionForTwo --- .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 603bfddab0ebe..ee986197a0224 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -488,7 +488,7 @@ object TypeCoercion { case i @ In(a, b) if b.exists(_.dataType != a.dataType) => if (b.map(_.dataType).distinct.size == 1) { findCommonTypeForBinaryComparison(a.dataType, b.head.dataType, conf) - .orElse(findWiderTypeForTwo(a.dataType, b.head.dataType)) match { + .orElse(findWiderTypeWithoutStringPromotionForTwo(a.dataType, b.head.dataType)) match { case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType))) case None => i } From 935ed36efb908fae8bec3521c7c018350daad665 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 9 Aug 2018 23:21:51 +0800 Subject: [PATCH 03/11] Add findInCommonType --- .../sql/catalyst/analysis/TypeCoercion.scala | 24 +++++----- .../typeCoercion/native/inConversion.sql.out | 48 +++++++++---------- 2 files changed, 35 insertions(+), 37 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index ee986197a0224..96ac80c48720c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -231,6 +231,16 @@ object TypeCoercion { }) } + private def findInCommonType(types: Seq[DataType], conf: SQLConf): Option[DataType] = { + val (stringTypes, nonStringTypes) = types.partition(hasStringType(_)) + (stringTypes.distinct ++ nonStringTypes).foldLeft[Option[DataType]](Some(NullType))((r, c) => + r match { + case Some(d) => findCommonTypeForBinaryComparison(d, c, conf) + .orElse(findWiderTypeWithoutStringPromotionForTwo(d, c)) + case _ => None + }) + } + /** * Similar to [[findWiderTypeForTwo]] that can handle decimal types, but can't promote to * string. If the wider decimal type exceeds system limitation, this rule will truncate @@ -486,17 +496,9 @@ object TypeCoercion { } case i @ In(a, b) if b.exists(_.dataType != a.dataType) => - if (b.map(_.dataType).distinct.size == 1) { - findCommonTypeForBinaryComparison(a.dataType, b.head.dataType, conf) - .orElse(findWiderTypeWithoutStringPromotionForTwo(a.dataType, b.head.dataType)) match { - case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType))) - case None => i - } - } else { - findWiderCommonType(i.children.map(_.dataType)) match { - case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType))) - case None => i - } + findInCommonType(i.children.map(_.dataType), conf) match { + case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType))) + case None => i } } } diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out index 9831c57101a32..9c7c5ba355c65 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out @@ -1287,7 +1287,7 @@ true -- !query 152 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as string)) FROM t -- !query 152 schema -struct<(CAST(CAST(1 AS TINYINT) AS STRING) IN (CAST(CAST(1 AS TINYINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS TINYINT) AS TINYINT) IN (CAST(CAST(1 AS TINYINT) AS TINYINT), CAST(CAST(1 AS STRING) AS TINYINT))):boolean> -- !query 152 output true @@ -1387,7 +1387,7 @@ true -- !query 164 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as string)) FROM t -- !query 164 schema -struct<(CAST(CAST(1 AS SMALLINT) AS STRING) IN (CAST(CAST(1 AS SMALLINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS SMALLINT) AS SMALLINT) IN (CAST(CAST(1 AS SMALLINT) AS SMALLINT), CAST(CAST(1 AS STRING) AS SMALLINT))):boolean> -- !query 164 output true @@ -1487,7 +1487,7 @@ true -- !query 176 SELECT cast(1 as int) in (cast(1 as int), cast(1 as string)) FROM t -- !query 176 schema -struct<(CAST(CAST(1 AS INT) AS STRING) IN (CAST(CAST(1 AS INT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS INT) AS INT), CAST(CAST(1 AS STRING) AS INT))):boolean> -- !query 176 output true @@ -1587,7 +1587,7 @@ true -- !query 188 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as string)) FROM t -- !query 188 schema -struct<(CAST(CAST(1 AS BIGINT) AS STRING) IN (CAST(CAST(1 AS BIGINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT), CAST(CAST(1 AS STRING) AS BIGINT))):boolean> -- !query 188 output true @@ -1687,7 +1687,7 @@ true -- !query 200 SELECT cast(1 as float) in (cast(1 as float), cast(1 as string)) FROM t -- !query 200 schema -struct<(CAST(CAST(1 AS FLOAT) AS STRING) IN (CAST(CAST(1 AS FLOAT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT), CAST(CAST(1 AS STRING) AS FLOAT))):boolean> -- !query 200 output true @@ -1787,7 +1787,7 @@ true -- !query 212 SELECT cast(1 as double) in (cast(1 as double), cast(1 as string)) FROM t -- !query 212 schema -struct<(CAST(CAST(1 AS DOUBLE) AS STRING) IN (CAST(CAST(1 AS DOUBLE) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE))):boolean> -- !query 212 output true @@ -1887,7 +1887,7 @@ true -- !query 224 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as string)) FROM t -- !query 224 schema -struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS STRING) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE))):boolean> -- !query 224 output true @@ -1931,7 +1931,7 @@ cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST('20 -- !query 229 SELECT cast(1 as string) in (cast(1 as string), cast(1 as tinyint)) FROM t -- !query 229 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS TINYINT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS TINYINT) IN (CAST(CAST(1 AS STRING) AS TINYINT), CAST(CAST(1 AS TINYINT) AS TINYINT))):boolean> -- !query 229 output true @@ -1939,7 +1939,7 @@ true -- !query 230 SELECT cast(1 as string) in (cast(1 as string), cast(1 as smallint)) FROM t -- !query 230 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS SMALLINT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS SMALLINT) IN (CAST(CAST(1 AS STRING) AS SMALLINT), CAST(CAST(1 AS SMALLINT) AS SMALLINT))):boolean> -- !query 230 output true @@ -1947,7 +1947,7 @@ true -- !query 231 SELECT cast(1 as string) in (cast(1 as string), cast(1 as int)) FROM t -- !query 231 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS INT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS INT) IN (CAST(CAST(1 AS STRING) AS INT), CAST(CAST(1 AS INT) AS INT))):boolean> -- !query 231 output true @@ -1955,7 +1955,7 @@ true -- !query 232 SELECT cast(1 as string) in (cast(1 as string), cast(1 as bigint)) FROM t -- !query 232 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS BIGINT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS BIGINT) IN (CAST(CAST(1 AS STRING) AS BIGINT), CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean> -- !query 232 output true @@ -1963,7 +1963,7 @@ true -- !query 233 SELECT cast(1 as string) in (cast(1 as string), cast(1 as float)) FROM t -- !query 233 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS FLOAT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS FLOAT) IN (CAST(CAST(1 AS STRING) AS FLOAT), CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean> -- !query 233 output true @@ -1971,7 +1971,7 @@ true -- !query 234 SELECT cast(1 as string) in (cast(1 as string), cast(1 as double)) FROM t -- !query 234 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS DOUBLE) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS DOUBLE) IN (CAST(CAST(1 AS STRING) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean> -- !query 234 output true @@ -1979,7 +1979,7 @@ true -- !query 235 SELECT cast(1 as string) in (cast(1 as string), cast(1 as decimal(10, 0))) FROM t -- !query 235 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS DECIMAL(10,0)) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS DOUBLE) IN (CAST(CAST(1 AS STRING) AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean> -- !query 235 output true @@ -1995,19 +1995,17 @@ true -- !query 237 SELECT cast(1 as string) in (cast(1 as string), cast('1' as binary)) FROM t -- !query 237 schema -struct<> +struct<(CAST(CAST(1 AS STRING) AS BINARY) IN (CAST(CAST(1 AS STRING) AS BINARY), CAST(CAST(1 AS BINARY) AS BINARY))):boolean> -- !query 237 output -org.apache.spark.sql.AnalysisException -cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: string != binary; line 1 pos 25 +true -- !query 238 SELECT cast(1 as string) in (cast(1 as string), cast(1 as boolean)) FROM t -- !query 238 schema -struct<> +struct<(CAST(CAST(1 AS STRING) AS BOOLEAN) IN (CAST(CAST(1 AS STRING) AS BOOLEAN), CAST(CAST(1 AS BOOLEAN) AS BOOLEAN))):boolean> -- !query 238 output -org.apache.spark.sql.AnalysisException -cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: string != boolean; line 1 pos 25 +true -- !query 239 @@ -2092,10 +2090,9 @@ cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS DECIMAL( -- !query 248 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as string)) FROM t -- !query 248 schema -struct<> +struct<(CAST(CAST(1 AS BINARY) AS BINARY) IN (CAST(CAST(1 AS BINARY) AS BINARY), CAST(CAST(1 AS STRING) AS BINARY))):boolean> -- !query 248 output -org.apache.spark.sql.AnalysisException -cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: binary != string; line 1 pos 27 +true -- !query 249 @@ -2199,10 +2196,9 @@ cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS DECIMA -- !query 260 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as string)) FROM t -- !query 260 schema -struct<> +struct<(CAST(CAST(1 AS BOOLEAN) AS BOOLEAN) IN (CAST(CAST(1 AS BOOLEAN) AS BOOLEAN), CAST(CAST(1 AS STRING) AS BOOLEAN))):boolean> -- !query 260 output -org.apache.spark.sql.AnalysisException -cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: boolean != string; line 1 pos 28 +true -- !query 261 From cb25b788cfc3cd7799a6671713558a32969f6dff Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 10 Aug 2018 14:16:51 +0800 Subject: [PATCH 04/11] Fix test error. --- .../spark/sql/catalyst/analysis/TypeCoercionSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index d71bbb3227134..0e2f15208adae 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -1378,8 +1378,8 @@ class TypeCoercionSuite extends AnalysisTest { ) ruleTest(inConversion, In(Literal("a"), Seq(Literal(1), Literal("b"))), - In(Cast(Literal("a"), StringType), - Seq(Cast(Literal(1), StringType), Cast(Literal("b"), StringType))) + In(Cast(Literal("a"), IntegerType), + Seq(Cast(Literal(1), IntegerType), Cast(Literal("b"), IntegerType))) ) } From 4fd21436f50130fb493d155aa3566268e67c5963 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 12 Aug 2018 11:54:19 +0800 Subject: [PATCH 05/11] Fix --- .../sql/catalyst/analysis/TypeCoercion.scala | 19 +++++------ .../catalyst/analysis/TypeCoercionSuite.scala | 4 +-- .../typeCoercion/native/inConversion.sql.out | 34 +++++++++++-------- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 96ac80c48720c..c9a2177b43e56 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -231,14 +231,13 @@ object TypeCoercion { }) } - private def findInCommonType(types: Seq[DataType], conf: SQLConf): Option[DataType] = { - val (stringTypes, nonStringTypes) = types.partition(hasStringType(_)) - (stringTypes.distinct ++ nonStringTypes).foldLeft[Option[DataType]](Some(NullType))((r, c) => - r match { - case Some(d) => findCommonTypeForBinaryComparison(d, c, conf) - .orElse(findWiderTypeWithoutStringPromotionForTwo(d, c)) - case _ => None - }) + private def findInCommonType( + valueType: DataType, listTypes: Seq[DataType], conf: SQLConf): Option[DataType] = { + findWiderCommonType(listTypes) match { + case Some(d) => findCommonTypeForBinaryComparison(valueType, d, conf) + .orElse(findWiderTypeWithoutStringPromotionForTwo(valueType, d)) + case _ => None + } } /** @@ -495,8 +494,8 @@ object TypeCoercion { i } - case i @ In(a, b) if b.exists(_.dataType != a.dataType) => - findInCommonType(i.children.map(_.dataType), conf) match { + case i @ In(value, list) if list.exists(_.dataType != value.dataType) => + findInCommonType(value.dataType, list.map(_.dataType), conf) match { case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType))) case None => i } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index 0e2f15208adae..d71bbb3227134 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -1378,8 +1378,8 @@ class TypeCoercionSuite extends AnalysisTest { ) ruleTest(inConversion, In(Literal("a"), Seq(Literal(1), Literal("b"))), - In(Cast(Literal("a"), IntegerType), - Seq(Cast(Literal(1), IntegerType), Cast(Literal("b"), IntegerType))) + In(Cast(Literal("a"), StringType), + Seq(Cast(Literal(1), StringType), Cast(Literal("b"), StringType))) ) } diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out index 9c7c5ba355c65..c7a22ea6ccad4 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out @@ -1931,7 +1931,7 @@ cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST('20 -- !query 229 SELECT cast(1 as string) in (cast(1 as string), cast(1 as tinyint)) FROM t -- !query 229 schema -struct<(CAST(CAST(1 AS STRING) AS TINYINT) IN (CAST(CAST(1 AS STRING) AS TINYINT), CAST(CAST(1 AS TINYINT) AS TINYINT))):boolean> +struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS TINYINT) AS STRING))):boolean> -- !query 229 output true @@ -1939,7 +1939,7 @@ true -- !query 230 SELECT cast(1 as string) in (cast(1 as string), cast(1 as smallint)) FROM t -- !query 230 schema -struct<(CAST(CAST(1 AS STRING) AS SMALLINT) IN (CAST(CAST(1 AS STRING) AS SMALLINT), CAST(CAST(1 AS SMALLINT) AS SMALLINT))):boolean> +struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS SMALLINT) AS STRING))):boolean> -- !query 230 output true @@ -1947,7 +1947,7 @@ true -- !query 231 SELECT cast(1 as string) in (cast(1 as string), cast(1 as int)) FROM t -- !query 231 schema -struct<(CAST(CAST(1 AS STRING) AS INT) IN (CAST(CAST(1 AS STRING) AS INT), CAST(CAST(1 AS INT) AS INT))):boolean> +struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS INT) AS STRING))):boolean> -- !query 231 output true @@ -1955,7 +1955,7 @@ true -- !query 232 SELECT cast(1 as string) in (cast(1 as string), cast(1 as bigint)) FROM t -- !query 232 schema -struct<(CAST(CAST(1 AS STRING) AS BIGINT) IN (CAST(CAST(1 AS STRING) AS BIGINT), CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean> +struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS BIGINT) AS STRING))):boolean> -- !query 232 output true @@ -1963,7 +1963,7 @@ true -- !query 233 SELECT cast(1 as string) in (cast(1 as string), cast(1 as float)) FROM t -- !query 233 schema -struct<(CAST(CAST(1 AS STRING) AS FLOAT) IN (CAST(CAST(1 AS STRING) AS FLOAT), CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean> +struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS FLOAT) AS STRING))):boolean> -- !query 233 output true @@ -1971,7 +1971,7 @@ true -- !query 234 SELECT cast(1 as string) in (cast(1 as string), cast(1 as double)) FROM t -- !query 234 schema -struct<(CAST(CAST(1 AS STRING) AS DOUBLE) IN (CAST(CAST(1 AS STRING) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean> +struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS DOUBLE) AS STRING))):boolean> -- !query 234 output true @@ -1979,7 +1979,7 @@ true -- !query 235 SELECT cast(1 as string) in (cast(1 as string), cast(1 as decimal(10, 0))) FROM t -- !query 235 schema -struct<(CAST(CAST(1 AS STRING) AS DOUBLE) IN (CAST(CAST(1 AS STRING) AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean> +struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS DECIMAL(10,0)) AS STRING))):boolean> -- !query 235 output true @@ -1995,17 +1995,19 @@ true -- !query 237 SELECT cast(1 as string) in (cast(1 as string), cast('1' as binary)) FROM t -- !query 237 schema -struct<(CAST(CAST(1 AS STRING) AS BINARY) IN (CAST(CAST(1 AS STRING) AS BINARY), CAST(CAST(1 AS BINARY) AS BINARY))):boolean> +struct<> -- !query 237 output -true +org.apache.spark.sql.AnalysisException +cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: string != binary; line 1 pos 25 -- !query 238 SELECT cast(1 as string) in (cast(1 as string), cast(1 as boolean)) FROM t -- !query 238 schema -struct<(CAST(CAST(1 AS STRING) AS BOOLEAN) IN (CAST(CAST(1 AS STRING) AS BOOLEAN), CAST(CAST(1 AS BOOLEAN) AS BOOLEAN))):boolean> +struct<> -- !query 238 output -true +org.apache.spark.sql.AnalysisException +cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: string != boolean; line 1 pos 25 -- !query 239 @@ -2090,9 +2092,10 @@ cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS DECIMAL( -- !query 248 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as string)) FROM t -- !query 248 schema -struct<(CAST(CAST(1 AS BINARY) AS BINARY) IN (CAST(CAST(1 AS BINARY) AS BINARY), CAST(CAST(1 AS STRING) AS BINARY))):boolean> +struct<> -- !query 248 output -true +org.apache.spark.sql.AnalysisException +cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: binary != string; line 1 pos 27 -- !query 249 @@ -2196,9 +2199,10 @@ cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS DECIMA -- !query 260 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as string)) FROM t -- !query 260 schema -struct<(CAST(CAST(1 AS BOOLEAN) AS BOOLEAN) IN (CAST(CAST(1 AS BOOLEAN) AS BOOLEAN), CAST(CAST(1 AS STRING) AS BOOLEAN))):boolean> +struct<> -- !query 260 output -true +org.apache.spark.sql.AnalysisException +cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: boolean != string; line 1 pos 28 -- !query 261 From 20bdc954da598fe83e96ef55dbc783534d0668cf Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 14 Nov 2019 15:55:18 +0800 Subject: [PATCH 06/11] Merge master --- .../sql/catalyst/analysis/TypeCoercion.scala | 16 +++++---------- .../catalyst/analysis/TypeCoercionSuite.scala | 5 +++++ .../typeCoercion/native/inConversion.sql.out | 20 +++++++++---------- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 030ccb4974535..7d28cef5282d0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -241,15 +241,6 @@ object TypeCoercion { }) } - private def findInCommonType( - valueType: DataType, listTypes: Seq[DataType], conf: SQLConf): Option[DataType] = { - findWiderCommonType(listTypes) match { - case Some(d) => findCommonTypeForBinaryComparison(valueType, d, conf) - .orElse(findWiderTypeWithoutStringPromotionForTwo(valueType, d)) - case _ => None - } - } - /** * Similar to [[findWiderTypeForTwo]] that can handle decimal types, but can't promote to * string. If the wider decimal type exceeds system limitation, this rule will truncate @@ -505,8 +496,11 @@ object TypeCoercion { } case i @ In(value, list) if list.exists(_.dataType != value.dataType) => - findInCommonType(value.dataType, list.map(_.dataType), conf) match { - case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType))) + findWiderCommonType(list.map(_.dataType)) match { + case Some(listType) => + val finalDataType = findCommonTypeForBinaryComparison(value.dataType, listType, conf) + .orElse(findWiderTypeWithoutStringPromotionForTwo(value.dataType, listType)) + finalDataType.map(t => i.withNewChildren(i.children.map(Cast(_, t)))).getOrElse(i) case None => i } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index c7371a7911df5..c6c95b34055d6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -1459,6 +1459,11 @@ class TypeCoercionSuite extends AnalysisTest { In(Cast(Literal("a"), StringType), Seq(Cast(Literal(1), StringType), Cast(Literal("b"), StringType))) ) + ruleTest(inConversion, + In(Literal(Decimal(3.13)), Seq(Literal("1"), Literal(2))), + In(Cast(Decimal(3.13), DoubleType), + Seq(Cast(Literal("1"), DoubleType), Cast(Literal(2), DoubleType))) + ) } test("SPARK-15776 Divide expression's dataType should be casted to Double or Decimal " + diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out index c7a22ea6ccad4..e0a8f7c6179e8 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out @@ -793,17 +793,17 @@ true -- !query 95 SELECT cast(1 as string) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t -- !query 95 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS TIMESTAMP) IN (CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP))):boolean> -- !query 95 output -false +NULL -- !query 96 SELECT cast(1 as string) in (cast('2017-12-11 09:30:00' as date)) FROM t -- !query 96 schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS DATE) IN (CAST(CAST(2017-12-11 09:30:00 AS DATE) AS DATE))):boolean> -- !query 96 output -false +NULL -- !query 97 @@ -1084,9 +1084,9 @@ cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS DECIMA -- !query 128 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as string)) FROM t -- !query 128 schema -struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING) IN (CAST(CAST(2 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP) IN (CAST(CAST(2 AS STRING) AS TIMESTAMP))):boolean> -- !query 128 output -false +NULL -- !query 129 @@ -1189,9 +1189,9 @@ cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS DECIMAL(10,0) -- !query 140 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as string)) FROM t -- !query 140 schema -struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING) IN (CAST(CAST(2 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS DATE) IN (CAST(CAST(2 AS STRING) AS DATE))):boolean> -- !query 140 output -false +NULL -- !query 141 @@ -2306,7 +2306,7 @@ cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 -- !query 272 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as string)) FROM t -- !query 272 schema -struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING) IN (CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP) IN (CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP), CAST(CAST(1 AS STRING) AS TIMESTAMP))):boolean> -- !query 272 output true @@ -2411,7 +2411,7 @@ cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30: -- !query 284 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as string)) FROM t -- !query 284 schema -struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING) IN (CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS DATE) IN (CAST(CAST(2017-12-12 09:30:00 AS DATE) AS DATE), CAST(CAST(1 AS STRING) AS DATE))):boolean> -- !query 284 output true From 87ab27ef9f26274c40fbd7a597e1dc13125b8a45 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 14 Nov 2019 17:20:06 +0800 Subject: [PATCH 07/11] Fix indentation --- .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 7d28cef5282d0..980f3373294c7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -499,7 +499,7 @@ object TypeCoercion { findWiderCommonType(list.map(_.dataType)) match { case Some(listType) => val finalDataType = findCommonTypeForBinaryComparison(value.dataType, listType, conf) - .orElse(findWiderTypeWithoutStringPromotionForTwo(value.dataType, listType)) + .orElse(findWiderTypeWithoutStringPromotionForTwo(value.dataType, listType)) finalDataType.map(t => i.withNewChildren(i.children.map(Cast(_, t)))).getOrElse(i) case None => i } From 80adb74df4f06b52808e2552f26be9a53313ae17 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 14 Nov 2019 18:41:50 +0800 Subject: [PATCH 08/11] findCommonTypeForBinaryComparison -> findWiderTypeForDecimal -> findTightestCommonType --- .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 980f3373294c7..356053f279a6f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -499,7 +499,8 @@ object TypeCoercion { findWiderCommonType(list.map(_.dataType)) match { case Some(listType) => val finalDataType = findCommonTypeForBinaryComparison(value.dataType, listType, conf) - .orElse(findWiderTypeWithoutStringPromotionForTwo(value.dataType, listType)) + .orElse(findWiderTypeForDecimal(value.dataType, listType)) + .orElse(findTightestCommonType(value.dataType, listType)) finalDataType.map(t => i.withNewChildren(i.children.map(Cast(_, t)))).getOrElse(i) case None => i } From 232e42fd3192c500c44934855754773eaa2d5125 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 2 Feb 2020 19:32:18 +0800 Subject: [PATCH 09/11] Merge upstream --- .../typeCoercion/native/inConversion.sql.out | 90 +++++++++---------- 1 file changed, 43 insertions(+), 47 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out index 21d0a0e0fef4e..917e3b0a18b49 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out @@ -69,7 +69,7 @@ true -- !query SELECT cast(1 as tinyint) in (cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS TINYINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS TINYINT) AS TINYINT) IN (CAST(CAST(1 AS STRING) AS TINYINT))):boolean> -- !query output true @@ -169,7 +169,7 @@ true -- !query SELECT cast(1 as smallint) in (cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS SMALLINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS SMALLINT) AS SMALLINT) IN (CAST(CAST(1 AS STRING) AS SMALLINT))):boolean> -- !query output true @@ -269,7 +269,7 @@ true -- !query SELECT cast(1 as int) in (cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS INT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS STRING) AS INT))):boolean> -- !query output true @@ -369,7 +369,7 @@ true -- !query SELECT cast(1 as bigint) in (cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS BIGINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS STRING) AS BIGINT))):boolean> -- !query output true @@ -469,9 +469,9 @@ true -- !query SELECT cast(1 as float) in (cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS FLOAT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS STRING) AS FLOAT))):boolean> -- !query output -false +true -- !query @@ -569,9 +569,9 @@ true -- !query SELECT cast(1 as double) in (cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS DOUBLE) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS STRING) AS DOUBLE))):boolean> -- !query output -false +true -- !query @@ -669,7 +669,7 @@ true -- !query SELECT cast(1 as decimal(10, 0)) in (cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS STRING) AS DOUBLE))):boolean> -- !query output true @@ -713,7 +713,7 @@ cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST('2017-12-11 09:30:00' AS DATE -- !query SELECT cast(1 as string) in (cast(1 as tinyint)) FROM t -- !query schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS TINYINT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS TINYINT) IN (CAST(CAST(1 AS TINYINT) AS TINYINT))):boolean> -- !query output true @@ -721,7 +721,7 @@ true -- !query SELECT cast(1 as string) in (cast(1 as smallint)) FROM t -- !query schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS SMALLINT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS SMALLINT) IN (CAST(CAST(1 AS SMALLINT) AS SMALLINT))):boolean> -- !query output true @@ -729,7 +729,7 @@ true -- !query SELECT cast(1 as string) in (cast(1 as int)) FROM t -- !query schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS INT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS INT) IN (CAST(CAST(1 AS INT) AS INT))):boolean> -- !query output true @@ -737,7 +737,7 @@ true -- !query SELECT cast(1 as string) in (cast(1 as bigint)) FROM t -- !query schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS BIGINT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean> -- !query output true @@ -745,23 +745,23 @@ true -- !query SELECT cast(1 as string) in (cast(1 as float)) FROM t -- !query schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS FLOAT) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean> -- !query output -false +true -- !query SELECT cast(1 as string) in (cast(1 as double)) FROM t -- !query schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS DOUBLE) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean> -- !query output -false +true -- !query SELECT cast(1 as string) in (cast(1 as decimal(10, 0))) FROM t -- !query schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean> -- !query output true @@ -777,35 +777,33 @@ true -- !query SELECT cast(1 as string) in (cast('1' as binary)) FROM t -- !query schema -struct<> +struct<(CAST(CAST(1 AS STRING) AS BINARY) IN (CAST(CAST(1 AS BINARY) AS BINARY))):boolean> -- !query output -org.apache.spark.sql.AnalysisException -cannot resolve '(CAST(1 AS STRING) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: string != binary; line 1 pos 25 +true -- !query SELECT cast(1 as string) in (cast(1 as boolean)) FROM t -- !query schema -struct<> +struct<(CAST(CAST(1 AS STRING) AS BOOLEAN) IN (CAST(CAST(1 AS BOOLEAN) AS BOOLEAN))):boolean> -- !query output -org.apache.spark.sql.AnalysisException -cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: string != boolean; line 1 pos 25 +true -- !query SELECT cast(1 as string) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t -- !query schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS TIMESTAMP) IN (CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP))):boolean> -- !query output -false +NULL -- !query SELECT cast(1 as string) in (cast('2017-12-11 09:30:00' as date)) FROM t -- !query schema -struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING))):boolean> +struct<(CAST(CAST(1 AS STRING) AS DATE) IN (CAST(CAST(2017-12-11 09:30:00 AS DATE) AS DATE))):boolean> -- !query output -false +NULL -- !query @@ -874,10 +872,9 @@ cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS DECIMAL(10,0))))' due to data -- !query SELECT cast('1' as binary) in (cast(1 as string)) FROM t -- !query schema -struct<> +struct<(CAST(CAST(1 AS BINARY) AS BINARY) IN (CAST(CAST(1 AS STRING) AS BINARY))):boolean> -- !query output -org.apache.spark.sql.AnalysisException -cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: binary != string; line 1 pos 27 +true -- !query @@ -981,10 +978,9 @@ cannot resolve '(true IN (CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: -- !query SELECT true in (cast(1 as string)) FROM t -- !query schema -struct<> +struct<(CAST(true AS BOOLEAN) IN (CAST(CAST(1 AS STRING) AS BOOLEAN))):boolean> -- !query output -org.apache.spark.sql.AnalysisException -cannot resolve '(true IN (CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: boolean != string; line 1 pos 12 +true -- !query @@ -1088,9 +1084,9 @@ cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS DECIMA -- !query SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as string)) FROM t -- !query schema -struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING) IN (CAST(CAST(2 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP) IN (CAST(CAST(2 AS STRING) AS TIMESTAMP))):boolean> -- !query output -false +NULL -- !query @@ -1193,9 +1189,9 @@ cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS DECIMAL(10,0) -- !query SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as string)) FROM t -- !query schema -struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING) IN (CAST(CAST(2 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS DATE) IN (CAST(CAST(2 AS STRING) AS DATE))):boolean> -- !query output -false +NULL -- !query @@ -1291,7 +1287,7 @@ true -- !query SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS TINYINT) AS STRING) IN (CAST(CAST(1 AS TINYINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS TINYINT) AS TINYINT) IN (CAST(CAST(1 AS TINYINT) AS TINYINT), CAST(CAST(1 AS STRING) AS TINYINT))):boolean> -- !query output true @@ -1391,7 +1387,7 @@ true -- !query SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS SMALLINT) AS STRING) IN (CAST(CAST(1 AS SMALLINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS SMALLINT) AS SMALLINT) IN (CAST(CAST(1 AS SMALLINT) AS SMALLINT), CAST(CAST(1 AS STRING) AS SMALLINT))):boolean> -- !query output true @@ -1491,7 +1487,7 @@ true -- !query SELECT cast(1 as int) in (cast(1 as int), cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS INT) AS STRING) IN (CAST(CAST(1 AS INT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS INT) AS INT), CAST(CAST(1 AS STRING) AS INT))):boolean> -- !query output true @@ -1591,7 +1587,7 @@ true -- !query SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS BIGINT) AS STRING) IN (CAST(CAST(1 AS BIGINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT), CAST(CAST(1 AS STRING) AS BIGINT))):boolean> -- !query output true @@ -1691,7 +1687,7 @@ true -- !query SELECT cast(1 as float) in (cast(1 as float), cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS FLOAT) AS STRING) IN (CAST(CAST(1 AS FLOAT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT), CAST(CAST(1 AS STRING) AS FLOAT))):boolean> -- !query output true @@ -1791,7 +1787,7 @@ true -- !query SELECT cast(1 as double) in (cast(1 as double), cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS DOUBLE) AS STRING) IN (CAST(CAST(1 AS DOUBLE) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE))):boolean> -- !query output true @@ -1891,7 +1887,7 @@ true -- !query SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS STRING) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE))):boolean> -- !query output true @@ -2310,7 +2306,7 @@ cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 -- !query SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING) IN (CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP) IN (CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP), CAST(CAST(1 AS STRING) AS TIMESTAMP))):boolean> -- !query output true @@ -2415,7 +2411,7 @@ cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30: -- !query SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as string)) FROM t -- !query schema -struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING) IN (CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean> +struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS DATE) IN (CAST(CAST(2017-12-12 09:30:00 AS DATE) AS DATE), CAST(CAST(1 AS STRING) AS DATE))):boolean> -- !query output true From b1958dd9a7a83ffb6576f04b13e210ae975f8263 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 22 Feb 2020 19:43:44 +0800 Subject: [PATCH 10/11] Add LEGACY_IN_PREDICATE_FOLLOW_BINARY_COMPARISON_TYPE_COERCION --- .../sql/catalyst/analysis/TypeCoercion.scala | 21 ++++++++++++------- .../apache/spark/sql/internal/SQLConf.scala | 7 +++++++ .../catalyst/analysis/TypeCoercionSuite.scala | 18 +++++++++++----- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index e034ca3bcd2e3..31a684f4fe2ab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -492,13 +492,20 @@ object TypeCoercion { } case i @ In(value, list) if list.exists(_.dataType != value.dataType) => - findWiderCommonType(list.map(_.dataType)) match { - case Some(listType) => - val finalDataType = findCommonTypeForBinaryComparison(value.dataType, listType, conf) - .orElse(findWiderTypeForDecimal(value.dataType, listType)) - .orElse(findTightestCommonType(value.dataType, listType)) - finalDataType.map(t => i.withNewChildren(i.children.map(Cast(_, t)))).getOrElse(i) - case None => i + if (conf.getConf(SQLConf.LEGACY_IN_PREDICATE_FOLLOW_BINARY_COMPARISON_TYPE_COERCION)) { + findWiderCommonType(list.map(_.dataType)) match { + case Some(listType) => + val finalDataType = findCommonTypeForBinaryComparison(value.dataType, listType, conf) + .orElse(findWiderTypeForDecimal(value.dataType, listType)) + .orElse(findTightestCommonType(value.dataType, listType)) + finalDataType.map(t => i.withNewChildren(i.children.map(Cast(_, t)))).getOrElse(i) + case None => i + } + } else { + findWiderCommonType(i.children.map(_.dataType)) match { + case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType))) + case None => i + } } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index faf9a3c7e1e39..2f80b17ede777 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2025,6 +2025,13 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LEGACY_IN_PREDICATE_FOLLOW_BINARY_COMPARISON_TYPE_COERCION = + buildConf("spark.sql.legacy.inPredicateFollowBinaryComparisonTypeCoercion") + .internal() + .doc("When set to true, the in predicate follows binary comparison type coercion.") + .booleanConf + .createWithDefault(true) + val TRUNCATE_TABLE_IGNORE_PERMISSION_ACL = buildConf("spark.sql.truncateTable.ignorePermissionAcl.enabled") .internal() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index 89c693c2baa78..add267fd7b458 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -1421,11 +1421,19 @@ class TypeCoercionSuite extends AnalysisTest { In(Cast(Literal("a"), StringType), Seq(Cast(Literal(1), StringType), Cast(Literal("b"), StringType))) ) - ruleTest(inConversion, - In(Literal(Decimal(3.13)), Seq(Literal("1"), Literal(2))), - In(Cast(Decimal(3.13), DoubleType), - Seq(Cast(Literal("1"), DoubleType), Cast(Literal(2), DoubleType))) - ) + Seq(true, false).foreach { followBinaryComparison => + val converted = if (followBinaryComparison) { + In(Cast(Decimal(3.13), DoubleType), + Seq(Cast(Literal("1"), DoubleType), Cast(Literal(2), DoubleType))) + } else { + In(Cast(Decimal(3.13), StringType), + Seq(Cast(Literal("1"), StringType), Cast(Literal(2), StringType))) + } + withSQLConf(SQLConf.LEGACY_IN_PREDICATE_FOLLOW_BINARY_COMPARISON_TYPE_COERCION.key -> + followBinaryComparison.toString) { + ruleTest(inConversion, In(Literal(Decimal(3.13)), Seq(Literal("1"), Literal(2))), converted) + } + } } test("SPARK-15776 Divide expression's dataType should be casted to Double or Decimal " + From e60ff29c91ea03f6498c3e63e83f8dcf86285470 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 21 Mar 2020 23:46:10 +0800 Subject: [PATCH 11/11] Fix --- .../sql/catalyst/analysis/TypeCoercion.scala | 7 ---- .../catalyst/analysis/TypeCoercionSuite.scala | 37 ++++++++++++++++--- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index d125700e4f7cb..dd9024bf519df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -415,13 +415,6 @@ object TypeCoercion { if left.dataType != CalendarIntervalType => a.makeCopy(Array(left, Cast(right, DoubleType))) - // For equality between string and timestamp we cast the string to a timestamp - // so that things like rounding of subsecond precision does not affect the comparison. - case p @ Equality(left @ StringType(), right @ TimestampType()) => - p.makeCopy(Array(Cast(left, TimestampType), right)) - case p @ Equality(left @ TimestampType(), right @ StringType()) => - p.makeCopy(Array(left, Cast(right, TimestampType))) - case p @ BinaryComparison(left, right) if findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).isDefined => val commonType = findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).get diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index add267fd7b458..e573a51e6343b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.analysis -import java.sql.Timestamp +import java.sql.{Date, Timestamp} import org.apache.spark.sql.catalyst.analysis.TypeCoercion._ import org.apache.spark.sql.catalyst.dsl.expressions._ @@ -1421,17 +1421,42 @@ class TypeCoercionSuite extends AnalysisTest { In(Cast(Literal("a"), StringType), Seq(Cast(Literal(1), StringType), Cast(Literal("b"), StringType))) ) - Seq(true, false).foreach { followBinaryComparison => - val converted = if (followBinaryComparison) { + Seq(true, false).foreach { follow => + val decimalCase = if (follow) { In(Cast(Decimal(3.13), DoubleType), Seq(Cast(Literal("1"), DoubleType), Cast(Literal(2), DoubleType))) } else { In(Cast(Decimal(3.13), StringType), Seq(Cast(Literal("1"), StringType), Cast(Literal(2), StringType))) } - withSQLConf(SQLConf.LEGACY_IN_PREDICATE_FOLLOW_BINARY_COMPARISON_TYPE_COERCION.key -> - followBinaryComparison.toString) { - ruleTest(inConversion, In(Literal(Decimal(3.13)), Seq(Literal("1"), Literal(2))), converted) + val dateCase = if (follow) { + In(Cast(Literal(Date.valueOf("2017-03-01")), DateType), + Seq(Cast(Literal("2017-03-01"), DateType))) + } else { + In(Cast(Literal(Date.valueOf("2017-03-01")), StringType), + Seq(Cast(Literal("2017-03-01"), StringType))) + } + val timestampCase = if (follow) { + In(Cast(Literal(new Timestamp(0)), TimestampType), + Seq(Cast(Literal("1"), TimestampType), Cast(Literal(2), TimestampType))) + } else { + In(Cast(Literal(new Timestamp(0)), StringType), + Seq(Cast(Literal("1"), StringType), Cast(Literal(2), StringType))) + } + withSQLConf( + SQLConf.LEGACY_IN_PREDICATE_FOLLOW_BINARY_COMPARISON_TYPE_COERCION.key -> s"$follow") { + ruleTest( + inConversion, + In(Literal(Decimal(3.13)), Seq(Literal("1"), Literal(2))), + decimalCase) + ruleTest( + inConversion, + In(Literal(Date.valueOf("2017-03-01")), Seq(Literal("2017-03-01"))), + dateCase) + ruleTest( + inConversion, + In(Literal(new Timestamp(0)), Seq(Literal("1"), Literal(2))), + timestampCase) } } }