diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt index 4fed51113912f..01b063993cd9c 100644 --- a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt @@ -6,48 +6,89 @@ OpenJDK 64-Bit Server VM 11.0.6+10-post-Ubuntu-1ubuntu118.04.1 on Linux 4.15.0-1 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save dates to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, noop 9304 9304 0 10.7 93.0 1.0X -before 1582, noop 9187 9187 0 10.9 91.9 1.0X -after 1582, rebase off 22054 22054 0 4.5 220.5 0.4X -after 1582, rebase on 20361 20361 0 4.9 203.6 0.5X -before 1582, rebase off 20286 20286 0 4.9 202.9 0.5X -before 1582, rebase on 22230 22230 0 4.5 222.3 0.4X +after 1582, noop 9299 9299 0 10.8 93.0 1.0X +before 1582, noop 9220 9220 0 10.8 92.2 1.0X +after 1582, rebase off 20390 20390 0 4.9 203.9 0.5X +after 1582, rebase on 20378 20378 0 4.9 203.8 0.5X +before 1582, rebase off 20069 20069 0 5.0 200.7 0.5X +before 1582, rebase on 20637 20637 0 4.8 206.4 0.5X OpenJDK 64-Bit Server VM 11.0.6+10-post-Ubuntu-1ubuntu118.04.1 on Linux 4.15.0-1063-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load dates from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, vec off, rebase off 12773 12866 129 7.8 127.7 1.0X -after 1582, vec off, rebase on 13063 13086 39 7.7 130.6 1.0X -after 1582, vec on, rebase off 3678 3719 61 27.2 36.8 3.5X -after 1582, vec on, rebase on 5078 5121 52 19.7 50.8 2.5X -before 1582, vec off, rebase off 12942 12972 42 7.7 129.4 1.0X -before 1582, vec off, rebase on 13866 13904 58 7.2 138.7 0.9X -before 1582, vec on, rebase off 3678 3711 43 27.2 36.8 3.5X -before 1582, vec on, rebase on 5621 5657 44 17.8 56.2 2.3X +after 1582, vec off, rebase off 12927 13017 78 7.7 129.3 1.0X +after 1582, vec off, rebase on 13127 13176 50 7.6 131.3 1.0X +after 1582, vec on, rebase off 3725 3779 91 26.8 37.3 3.5X +after 1582, vec on, rebase on 5134 5221 99 19.5 51.3 2.5X +before 1582, vec off, rebase off 13049 13061 16 7.7 130.5 1.0X +before 1582, vec off, rebase on 13877 13916 51 7.2 138.8 0.9X +before 1582, vec on, rebase off 3702 3736 56 27.0 37.0 3.5X +before 1582, vec on, rebase on 5567 5637 78 18.0 55.7 2.3X OpenJDK 64-Bit Server VM 11.0.6+10-post-Ubuntu-1ubuntu118.04.1 on Linux 4.15.0-1063-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save timestamps to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, noop 2983 2983 0 33.5 29.8 1.0X -before 1582, noop 2979 2979 0 33.6 29.8 1.0X -after 1582, rebase off 17452 17452 0 5.7 174.5 0.2X -after 1582, rebase on 70193 70193 0 1.4 701.9 0.0X -before 1582, rebase off 17784 17784 0 5.6 177.8 0.2X -before 1582, rebase on 83498 83498 0 1.2 835.0 0.0X +after 1582, noop 2988 2988 0 33.5 29.9 1.0X +before 1582, noop 3000 3000 0 33.3 30.0 1.0X +after 1582, rebase off 16163 16163 0 6.2 161.6 0.2X +after 1582, rebase on 68399 68399 0 1.5 684.0 0.0X +before 1582, rebase off 16921 16921 0 5.9 169.2 0.2X +before 1582, rebase on 74425 74425 0 1.3 744.3 0.0X OpenJDK 64-Bit Server VM 11.0.6+10-post-Ubuntu-1ubuntu118.04.1 on Linux 4.15.0-1063-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load timestamps from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, vec off, rebase off 15114 15151 32 6.6 151.1 1.0X -after 1582, vec off, rebase on 45804 45912 126 2.2 458.0 0.3X -after 1582, vec on, rebase off 4900 4947 56 20.4 49.0 3.1X -after 1582, vec on, rebase on 34599 34650 45 2.9 346.0 0.4X -before 1582, vec off, rebase off 15093 15174 70 6.6 150.9 1.0X -before 1582, vec off, rebase on 47367 47472 121 2.1 473.7 0.3X -before 1582, vec on, rebase off 4884 4952 80 20.5 48.8 3.1X -before 1582, vec on, rebase on 35831 35883 59 2.8 358.3 0.4X +after 1582, vec off, rebase off 15147 15258 97 6.6 151.5 1.0X +after 1582, vec off, rebase on 45035 45101 60 2.2 450.3 0.3X +after 1582, vec on, rebase off 4934 5012 100 20.3 49.3 3.1X +after 1582, vec on, rebase on 34263 34360 88 2.9 342.6 0.4X +before 1582, vec off, rebase off 15177 15220 37 6.6 151.8 1.0X +before 1582, vec off, rebase on 46754 46761 12 2.1 467.5 0.3X +before 1582, vec on, rebase off 4892 4956 61 20.4 48.9 3.1X +before 1582, vec on, rebase on 35989 36014 22 2.8 359.9 0.4X + + +================================================================================================ +Rebasing dates/timestamps in ORC datasource +================================================================================================ + +OpenJDK 64-Bit Server VM 11.0.6+10-post-Ubuntu-1ubuntu118.04.1 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Save dates to ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +after 1582, noop 9295 9295 0 10.8 93.0 1.0X +before 1582, noop 9352 9352 0 10.7 93.5 1.0X +after 1582 17112 17112 0 5.8 171.1 0.5X +before 1582 17979 17979 0 5.6 179.8 0.5X + +OpenJDK 64-Bit Server VM 11.0.6+10-post-Ubuntu-1ubuntu118.04.1 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Load dates from ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +after 1582, vec off 20874 20905 38 4.8 208.7 1.0X +after 1582, vec on 3813 3844 28 26.2 38.1 5.5X +before 1582, vec off 25912 25949 38 3.9 259.1 0.8X +before 1582, vec on 4322 4343 19 23.1 43.2 4.8X + +OpenJDK 64-Bit Server VM 11.0.6+10-post-Ubuntu-1ubuntu118.04.1 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Save timestamps to ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +after 1582, noop 3003 3003 0 33.3 30.0 1.0X +before 1582, noop 3012 3012 0 33.2 30.1 1.0X +after 1582 41031 41031 0 2.4 410.3 0.1X +before 1582 44436 44436 0 2.3 444.4 0.1X + +OpenJDK 64-Bit Server VM 11.0.6+10-post-Ubuntu-1ubuntu118.04.1 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Load timestamps from ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +after 1582, vec off 28477 28582 92 3.5 284.8 1.0X +after 1582, vec on 20754 20924 237 4.8 207.5 1.4X +before 1582, vec off 32858 32921 58 3.0 328.6 0.9X +before 1582, vec on 25734 25769 30 3.9 257.3 1.1X diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt index ee486276653fe..b35301349a1ee 100644 --- a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt +++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt @@ -6,48 +6,89 @@ OpenJDK 64-Bit Server VM 1.8.0_242-8u242-b08-0ubuntu3~18.04-b08 on Linux 4.15.0- Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save dates to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, noop 9582 9582 0 10.4 95.8 1.0X -before 1582, noop 9473 9473 0 10.6 94.7 1.0X -after 1582, rebase off 21431 21431 0 4.7 214.3 0.4X -after 1582, rebase on 22156 22156 0 4.5 221.6 0.4X -before 1582, rebase off 21399 21399 0 4.7 214.0 0.4X -before 1582, rebase on 22927 22927 0 4.4 229.3 0.4X +after 1582, noop 9691 9691 0 10.3 96.9 1.0X +before 1582, noop 9024 9024 0 11.1 90.2 1.1X +after 1582, rebase off 21195 21195 0 4.7 211.9 0.5X +after 1582, rebase on 20045 20045 0 5.0 200.4 0.5X +before 1582, rebase off 20039 20039 0 5.0 200.4 0.5X +before 1582, rebase on 20451 20451 0 4.9 204.5 0.5X OpenJDK 64-Bit Server VM 1.8.0_242-8u242-b08-0ubuntu3~18.04-b08 on Linux 4.15.0-1063-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load dates from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, vec off, rebase off 12637 12736 111 7.9 126.4 1.0X -after 1582, vec off, rebase on 13463 13531 61 7.4 134.6 0.9X -after 1582, vec on, rebase off 3693 3703 8 27.1 36.9 3.4X -after 1582, vec on, rebase on 5242 5252 9 19.1 52.4 2.4X -before 1582, vec off, rebase off 13055 13169 126 7.7 130.5 1.0X -before 1582, vec off, rebase on 14067 14270 185 7.1 140.7 0.9X -before 1582, vec on, rebase off 3697 3702 7 27.1 37.0 3.4X -before 1582, vec on, rebase on 6058 6097 34 16.5 60.6 2.1X +after 1582, vec off, rebase off 13207 13339 116 7.6 132.1 1.0X +after 1582, vec off, rebase on 13408 13446 57 7.5 134.1 1.0X +after 1582, vec on, rebase off 3680 3712 39 27.2 36.8 3.6X +after 1582, vec on, rebase on 5229 5261 29 19.1 52.3 2.5X +before 1582, vec off, rebase off 13135 13164 25 7.6 131.4 1.0X +before 1582, vec off, rebase on 13946 14033 94 7.2 139.5 0.9X +before 1582, vec on, rebase off 3689 3726 49 27.1 36.9 3.6X +before 1582, vec on, rebase on 5679 5687 9 17.6 56.8 2.3X OpenJDK 64-Bit Server VM 1.8.0_242-8u242-b08-0ubuntu3~18.04-b08 on Linux 4.15.0-1063-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save timestamps to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, noop 2713 2713 0 36.9 27.1 1.0X -before 1582, noop 2715 2715 0 36.8 27.2 1.0X -after 1582, rebase off 16768 16768 0 6.0 167.7 0.2X -after 1582, rebase on 82811 82811 0 1.2 828.1 0.0X -before 1582, rebase off 17052 17052 0 5.9 170.5 0.2X -before 1582, rebase on 95134 95134 0 1.1 951.3 0.0X +after 1582, noop 2720 2720 0 36.8 27.2 1.0X +before 1582, noop 2712 2712 0 36.9 27.1 1.0X +after 1582, rebase off 16626 16626 0 6.0 166.3 0.2X +after 1582, rebase on 85136 85136 0 1.2 851.4 0.0X +before 1582, rebase off 16855 16855 0 5.9 168.6 0.2X +before 1582, rebase on 106121 106121 0 0.9 1061.2 0.0X OpenJDK 64-Bit Server VM 1.8.0_242-8u242-b08-0ubuntu3~18.04-b08 on Linux 4.15.0-1063-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load timestamps from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, vec off, rebase off 15200 15321 194 6.6 152.0 1.0X -after 1582, vec off, rebase on 63160 63337 177 1.6 631.6 0.2X -after 1582, vec on, rebase off 4891 4928 43 20.4 48.9 3.1X -after 1582, vec on, rebase on 45474 45484 10 2.2 454.7 0.3X -before 1582, vec off, rebase off 15203 15330 110 6.6 152.0 1.0X -before 1582, vec off, rebase on 65588 65664 73 1.5 655.9 0.2X -before 1582, vec on, rebase off 4844 4916 105 20.6 48.4 3.1X -before 1582, vec on, rebase on 47815 47943 162 2.1 478.2 0.3X +after 1582, vec off, rebase off 15198 15301 90 6.6 152.0 1.0X +after 1582, vec off, rebase on 55210 55370 140 1.8 552.1 0.3X +after 1582, vec on, rebase off 4859 4880 19 20.6 48.6 3.1X +after 1582, vec on, rebase on 44758 44824 85 2.2 447.6 0.3X +before 1582, vec off, rebase off 15206 15316 112 6.6 152.1 1.0X +before 1582, vec off, rebase on 60452 60588 222 1.7 604.5 0.3X +before 1582, vec on, rebase off 4892 4933 36 20.4 48.9 3.1X +before 1582, vec on, rebase on 46871 46950 82 2.1 468.7 0.3X + + +================================================================================================ +Rebasing dates/timestamps in ORC datasource +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_242-8u242-b08-0ubuntu3~18.04-b08 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Save dates to ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +after 1582, noop 9102 9102 0 11.0 91.0 1.0X +before 1582, noop 9099 9099 0 11.0 91.0 1.0X +after 1582 17652 17652 0 5.7 176.5 0.5X +before 1582 18284 18284 0 5.5 182.8 0.5X + +OpenJDK 64-Bit Server VM 1.8.0_242-8u242-b08-0ubuntu3~18.04-b08 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Load dates from ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +after 1582, vec off 25169 25215 48 4.0 251.7 1.0X +after 1582, vec on 3701 3717 16 27.0 37.0 6.8X +before 1582, vec off 26919 27045 182 3.7 269.2 0.9X +before 1582, vec on 4169 4192 31 24.0 41.7 6.0X + +OpenJDK 64-Bit Server VM 1.8.0_242-8u242-b08-0ubuntu3~18.04-b08 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Save timestamps to ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +after 1582, noop 2906 2906 0 34.4 29.1 1.0X +before 1582, noop 2863 2863 0 34.9 28.6 1.0X +after 1582 48858 48858 0 2.0 488.6 0.1X +before 1582 50945 50945 0 2.0 509.5 0.1X + +OpenJDK 64-Bit Server VM 1.8.0_242-8u242-b08-0ubuntu3~18.04-b08 on Linux 4.15.0-1063-aws +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Load timestamps from ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +after 1582, vec off 40925 40955 26 2.4 409.2 1.0X +after 1582, vec on 31246 31404 164 3.2 312.5 1.3X +before 1582, vec off 44634 44680 40 2.2 446.3 0.9X +before 1582, vec on 35578 35834 282 2.8 355.8 1.2X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala index 48ceccc9002c0..6285461d93097 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.execution.benchmark +import java.io.File import java.time.{LocalDate, LocalDateTime, LocalTime, ZoneOffset} import org.apache.spark.benchmark.Benchmark @@ -91,68 +92,116 @@ object DateTimeRebaseBenchmark extends SqlBasedBenchmark { } } + private def benchmarkInputs(benchmark: Benchmark, rowsNum: Int, dateTime: String): Unit = { + benchmark.addCase("after 1582, noop", 1) { _ => + genDF(rowsNum, dateTime, after1582 = true).noop() + } + benchmark.addCase("before 1582, noop", 1) { _ => + genDF(rowsNum, dateTime, after1582 = false).noop() + } + } + + private def flagToStr(flag: Boolean): String = { + if (flag) "on" else "off" + } + + private def caseName( + after1582: Boolean, + rebase: Option[Boolean] = None, + vec: Option[Boolean] = None): String = { + val period = if (after1582) "after" else "before" + val vecFlag = vec.map(flagToStr).map(flag => s", vec $flag").getOrElse("") + val rebaseFlag = rebase.map(flagToStr).map(flag => s", rebase $flag").getOrElse("") + s"$period 1582$vecFlag$rebaseFlag" + } + + private def getPath( + basePath: File, + dateTime: String, + after1582: Boolean, + rebase: Option[Boolean] = None): String = { + val period = if (after1582) "after" else "before" + val rebaseFlag = rebase.map(flagToStr).map(flag => s"_$flag").getOrElse("") + basePath.getAbsolutePath + s"/${dateTime}_${period}_1582$rebaseFlag" + } + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + val rowsNum = 100000000 + withTempPath { path => runBenchmark("Rebasing dates/timestamps in Parquet datasource") { - val rowsNum = 100000000 Seq("date", "timestamp").foreach { dateTime => val benchmark = new Benchmark(s"Save ${dateTime}s to parquet", rowsNum, output = output) - benchmark.addCase("after 1582, noop", 1) { _ => - genDF(rowsNum, dateTime, after1582 = true).noop() - } - benchmark.addCase("before 1582, noop", 1) { _ => - genDF(rowsNum, dateTime, after1582 = false).noop() - } - - def save(after1582: Boolean, rebase: Boolean): Unit = { - val period = if (after1582) "after" else "before" - val rebaseFlag = if (rebase) "on" else "off" - val caseName = s"$period 1582, rebase $rebaseFlag" - benchmark.addCase(caseName, 1) { _ => - withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_WRITE.key -> rebase.toString) { - val df = genDF(rowsNum, dateTime, after1582) - val pathToWrite = path.getAbsolutePath + s"/${dateTime}_${period}_1582_$rebaseFlag" - df.write - .mode("overwrite") - .format("parquet") - .save(pathToWrite) - } - } - } - + benchmarkInputs(benchmark, rowsNum, dateTime) Seq(true, false).foreach { after1582 => Seq(false, true).foreach { rebase => - save(after1582, rebase) + benchmark.addCase(caseName(after1582, Some(rebase)), 1) { _ => + withSQLConf( + SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_WRITE.key -> rebase.toString) { + genDF(rowsNum, dateTime, after1582) + .write + .mode("overwrite") + .format("parquet") + .save(getPath(path, dateTime, after1582, Some(rebase))) + } + } } } benchmark.run() val benchmark2 = new Benchmark( s"Load ${dateTime}s from parquet", rowsNum, output = output) - - def load(after1582: Boolean, vec: Boolean, rebase: Boolean): Unit = { - val period = if (after1582) "after" else "before" - val rebaseFlag = if (rebase) "on" else "off" - val vecFlag = if (vec) "on" else "off" - val caseName = s"$period 1582, vec $vecFlag, rebase $rebaseFlag" - benchmark2.addCase(caseName, 3) { _ => - withSQLConf( - SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vec.toString, - SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) { - val pathToRead = path.getAbsolutePath + s"/${dateTime}_${period}_1582_$rebaseFlag" - spark.read.format("parquet").load(pathToRead).noop() + Seq(true, false).foreach { after1582 => + Seq(false, true).foreach { vec => + Seq(false, true).foreach { rebase => + benchmark2.addCase(caseName(after1582, Some(rebase), Some(vec)), 3) { _ => + withSQLConf( + SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vec.toString, + SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) { + spark.read + .format("parquet") + .load(getPath(path, dateTime, after1582, Some(rebase))) + .noop() + } + } } } } + benchmark2.run() + } + } + } + + withTempPath { path => + runBenchmark("Rebasing dates/timestamps in ORC datasource") { + Seq("date", "timestamp").foreach { dateTime => + val benchmark = new Benchmark(s"Save ${dateTime}s to ORC", rowsNum, output = output) + benchmarkInputs(benchmark, rowsNum, dateTime) + Seq(true, false).foreach { after1582 => + benchmark.addCase(caseName(after1582), 1) { _ => + genDF(rowsNum, dateTime, after1582) + .write + .mode("overwrite") + .format("orc") + .save(getPath(path, dateTime, after1582)) + } + } + benchmark.run() + val benchmark2 = new Benchmark(s"Load ${dateTime}s from ORC", rowsNum, output = output) Seq(true, false).foreach { after1582 => Seq(false, true).foreach { vec => - Seq(false, true).foreach { rebase => - load(after1582, vec, rebase) + benchmark2.addCase(caseName(after1582, vec = Some(vec)), 3) { _ => + withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> vec.toString) { + spark + .read + .format("orc") + .load(getPath(path, dateTime, after1582)) + .noop() + } } } } - benchmark2.run() } }