Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/sql-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ license: |
- In Spark 3.1, `IllegalArgumentException` is returned for the incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'`, which are invalid. In Spark 3.0, these literals result in `NULL`s.

- In Spark 3.1, we remove the built-in Hive 1.2. You need to migrate your custom SerDes to Hive 2.3. See [HIVE-15167](https://issues.apache.org/jira/browse/HIVE-15167) for more details.

- In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`.

## Upgrading from Spark SQL 3.0 to 3.0.1

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2666,7 +2666,7 @@ object SQLConf {
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(LegacyBehaviorPolicy.values.map(_.toString))
.createWithDefault(LegacyBehaviorPolicy.LEGACY.toString)
.createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)

val LEGACY_PARQUET_REBASE_MODE_IN_READ =
buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInRead")
Expand Down Expand Up @@ -2696,7 +2696,7 @@ object SQLConf {
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(LegacyBehaviorPolicy.values.map(_.toString))
.createWithDefault(LegacyBehaviorPolicy.LEGACY.toString)
.createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)

val LEGACY_AVRO_REBASE_MODE_IN_WRITE =
buildConf("spark.sql.legacy.avro.datetimeRebaseModeInWrite")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
Seq(true, false).foreach { java8Api =>
withSQLConf(
SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString,
SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> "CORRECTED") {
SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> "CORRECTED",
SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> "CORRECTED") {
// spark.sql.parquet.outputTimestampType = TIMESTAMP_MILLIS
val millisData = Seq(
"1000-06-14 08:28:53.123",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
}
}
Seq(
"2_4_5" -> successInRead _,
"2_4_5" -> failInRead _,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No info about the writer. We take the mode from the SQL config and fail by default.

"2_4_6" -> successInRead _).foreach { case (version, checkDefaultRead) =>
withAllParquetReaders {
Seq("plain", "dict").foreach { enc =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1513,26 +1513,27 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
Seq(tbl, ext_tbl).foreach { tblName =>
sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')")

val expectedSize = 636
// analyze table
sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN")
var tableStats = getTableStats(tblName)
assert(tableStats.sizeInBytes == 601)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The size of the parquet files increased because we write metadata key org.apache.spark.int96NoRebase.

assert(tableStats.sizeInBytes == expectedSize)
assert(tableStats.rowCount.isEmpty)

sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS")
tableStats = getTableStats(tblName)
assert(tableStats.sizeInBytes == 601)
assert(tableStats.sizeInBytes == expectedSize)
assert(tableStats.rowCount.get == 1)

// analyze a single partition
sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS NOSCAN")
var partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
assert(partStats.sizeInBytes == 601)
assert(partStats.sizeInBytes == expectedSize)
assert(partStats.rowCount.isEmpty)

sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS")
partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
assert(partStats.sizeInBytes == 601)
assert(partStats.sizeInBytes == expectedSize)
assert(partStats.rowCount.get == 1)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
withSQLConf(
SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString,
SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString,
SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString,
SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) {
val dataGenerator = RandomDataGenerator.forType(
dataType = dataType,
Expand Down