Skip to content

Commit 272d229

Browse files
MaxGekkcloud-fan
authored andcommitted
[SPARK-31361][SQL][TESTS][FOLLOWUP] Check non-vectorized Parquet reader while date/timestamp rebasing
### What changes were proposed in this pull request? In PR, I propose to modify two tests of `ParquetIOSuite`: - SPARK-31159: rebasing timestamps in write - SPARK-31159: rebasing dates in write to check non-vectorized Parquet reader together with vectorized reader. ### Why are the changes needed? To improve test coverage and make sure that non-vectorized reader behaves similar to the vectorized reader. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running `PaquetIOSuite`: ``` $ ./build/sbt "test:testOnly *ParquetIOSuite" ``` Closes #28466 from MaxGekk/test-novec-rebase-ParquetIOSuite. Authored-by: Max Gekk <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent b31ae7b commit 272d229

File tree

1 file changed

+32
-22
lines changed

1 file changed

+32
-22
lines changed

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -952,18 +952,24 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
952952
.write
953953
.parquet(path)
954954
}
955-
// The file metadata indicates if it needs rebase or not, so we can always get the
956-
// correct result regardless of the "rebaseInRead" config.
957-
Seq(true, false).foreach { rebase =>
958-
withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
959-
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(tsStr)))
960-
}
961-
}
962955

963-
// Force to not rebase to prove the written datetime values are rebased and we will get
964-
// wrong result if we don't rebase while reading.
965-
withSQLConf("spark.test.forceNoRebase" -> "true") {
966-
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(nonRebased)))
956+
Seq(false, true).foreach { vectorized =>
957+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
958+
// The file metadata indicates if it needs rebase or not, so we can always get the
959+
// correct result regardless of the "rebaseInRead" config.
960+
Seq(true, false).foreach { rebase =>
961+
withSQLConf(
962+
SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
963+
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(tsStr)))
964+
}
965+
}
966+
967+
// Force to not rebase to prove the written datetime values are rebased
968+
// and we will get wrong result if we don't rebase while reading.
969+
withSQLConf("spark.test.forceNoRebase" -> "true") {
970+
checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(nonRebased)))
971+
}
972+
}
967973
}
968974
}
969975
}
@@ -981,18 +987,22 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
981987
.parquet(path)
982988
}
983989

984-
// The file metadata indicates if it needs rebase or not, so we can always get the correct
985-
// result regardless of the "rebaseInRead" config.
986-
Seq(true, false).foreach { rebase =>
987-
withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
988-
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-01")))
989-
}
990-
}
990+
Seq(false, true).foreach { vectorized =>
991+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
992+
// The file metadata indicates if it needs rebase or not, so we can always get the correct
993+
// result regardless of the "rebaseInRead" config.
994+
Seq(true, false).foreach { rebase =>
995+
withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) {
996+
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-01")))
997+
}
998+
}
991999

992-
// Force to not rebase to prove the written datetime values are rebased and we will get
993-
// wrong result if we don't rebase while reading.
994-
withSQLConf("spark.test.forceNoRebase" -> "true") {
995-
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-07")))
1000+
// Force to not rebase to prove the written datetime values are rebased and we will get
1001+
// wrong result if we don't rebase while reading.
1002+
withSQLConf("spark.test.forceNoRebase" -> "true") {
1003+
checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-07")))
1004+
}
1005+
}
9961006
}
9971007
}
9981008
}

0 commit comments

Comments
 (0)