diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 5612e4f1453f..124b04fb2bed 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -47,6 +47,8 @@ license: | - In Spark 3.1, `IllegalArgumentException` is returned for the incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'`, which are invalid. In Spark 3.0, these literals result in `NULL`s. - In Spark 3.1, we remove the built-in Hive 1.2. You need to migrate your custom SerDes to Hive 2.3. See [HIVE-15167](https://issues.apache.org/jira/browse/HIVE-15167) for more details. + + - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`. ## Upgrading from Spark SQL 3.0 to 3.0.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 3648615a1eae..65d976958ffd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2666,7 +2666,7 @@ object SQLConf { .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) - .createWithDefault(LegacyBehaviorPolicy.LEGACY.toString) + .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) val LEGACY_PARQUET_REBASE_MODE_IN_READ = buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInRead") @@ -2696,7 +2696,7 @@ object SQLConf { .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) - .createWithDefault(LegacyBehaviorPolicy.LEGACY.toString) + .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) val LEGACY_AVRO_REBASE_MODE_IN_WRITE = buildConf("spark.sql.legacy.avro.datetimeRebaseModeInWrite") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 763f9315bfc5..24a1ba124e56 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -586,7 +586,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared Seq(true, false).foreach { java8Api => withSQLConf( SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString, - SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> "CORRECTED") { + SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> "CORRECTED", + SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> "CORRECTED") { // spark.sql.parquet.outputTimestampType = TIMESTAMP_MILLIS val millisData = Seq( "1000-06-14 08:28:53.123", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index 214f36a2df71..dac4e950a782 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -1022,7 +1022,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession } } Seq( - "2_4_5" -> successInRead _, + "2_4_5" -> failInRead _, "2_4_6" -> successInRead _).foreach { case (version, checkDefaultRead) => withAllParquetReaders { Seq("plain", "dict").foreach { enc => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 52dd2b34a0e9..db0e93787338 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -1513,26 +1513,27 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto Seq(tbl, ext_tbl).foreach { tblName => sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')") + val expectedSize = 636 // analyze table sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN") var tableStats = getTableStats(tblName) - assert(tableStats.sizeInBytes == 601) + assert(tableStats.sizeInBytes == expectedSize) assert(tableStats.rowCount.isEmpty) sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS") tableStats = getTableStats(tblName) - assert(tableStats.sizeInBytes == 601) + assert(tableStats.sizeInBytes == expectedSize) assert(tableStats.rowCount.get == 1) // analyze a single partition sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS NOSCAN") var partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13")) - assert(partStats.sizeInBytes == 601) + assert(partStats.sizeInBytes == expectedSize) assert(partStats.rowCount.isEmpty) sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS") partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13")) - assert(partStats.sizeInBytes == 601) + assert(partStats.sizeInBytes == expectedSize) assert(partStats.rowCount.get == 1) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala index cbea74103343..b65a00457c72 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala @@ -155,6 +155,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes withSQLConf( SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString, SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString, + SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString, SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) { val dataGenerator = RandomDataGenerator.forType( dataType = dataType,