From ccdd21cfa75f8577b5f8093c8e0b1eba6aa2e055 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 30 Jun 2018 08:22:16 +0800 Subject: [PATCH 1/5] Improvement FilterPushdownBenchmark --- .../FilterPushdownBenchmark-results.txt | 515 ++++++++++++++++++ .../benchmark/FilterPushdownBenchmark.scala | 371 +++++-------- 2 files changed, 651 insertions(+), 235 deletions(-) create mode 100644 sql/core/benchmarks/FilterPushdownBenchmark-results.txt diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt new file mode 100644 index 000000000000..3dd116ebd2fd --- /dev/null +++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt @@ -0,0 +1,515 @@ +############################[ Pushdown for many distinct value case ]############################ +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 0 string row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7928 / 8019 2.0 504.0 1.0X +Parquet Vectorized (Pushdown) 260 / 277 60.4 16.6 30.4X +Native ORC Vectorized 7958 / 8510 2.0 506.0 1.0X +Native ORC Vectorized (Pushdown) 909 / 916 17.3 57.8 8.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 0 string row ('7864320' < value < '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 8039 / 8779 2.0 511.1 1.0X +Parquet Vectorized (Pushdown) 269 / 279 58.5 17.1 29.9X +Native ORC Vectorized 7224 / 7367 2.2 459.3 1.1X +Native ORC Vectorized (Pushdown) 927 / 953 17.0 59.0 8.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 string row (value = '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 8071 / 8164 1.9 513.2 1.0X +Parquet Vectorized (Pushdown) 257 / 268 61.3 16.3 31.5X +Native ORC Vectorized 7121 / 7240 2.2 452.8 1.1X +Native ORC Vectorized (Pushdown) 866 / 907 18.2 55.0 9.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 string row (value <=> '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7981 / 8097 2.0 507.4 1.0X +Parquet Vectorized (Pushdown) 251 / 268 62.6 16.0 31.8X +Native ORC Vectorized 7128 / 7221 2.2 453.2 1.1X +Native ORC Vectorized (Pushdown) 866 / 926 18.2 55.0 9.2X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 string row ('7864320' <= value <= '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 8032 / 8180 2.0 510.6 1.0X +Parquet Vectorized (Pushdown) 256 / 263 61.3 16.3 31.3X +Native ORC Vectorized 7234 / 7299 2.2 459.9 1.1X +Native ORC Vectorized (Pushdown) 889 / 917 17.7 56.5 9.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select all string rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 16913 / 16974 0.9 1075.3 1.0X +Parquet Vectorized (Pushdown) 16763 / 16853 0.9 1065.8 1.0X +Native ORC Vectorized 16546 / 16680 1.0 1052.0 1.0X +Native ORC Vectorized (Pushdown) 16796 / 16989 0.9 1067.9 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 0 int row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7213 / 7272 2.2 458.6 1.0X +Parquet Vectorized (Pushdown) 251 / 258 62.7 16.0 28.7X +Native ORC Vectorized 6490 / 7068 2.4 412.6 1.1X +Native ORC Vectorized (Pushdown) 880 / 910 17.9 56.0 8.2X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 0 int row (7864320 < value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7206 / 7316 2.2 458.1 1.0X +Parquet Vectorized (Pushdown) 264 / 270 59.5 16.8 27.3X +Native ORC Vectorized 6415 / 6454 2.5 407.8 1.1X +Native ORC Vectorized (Pushdown) 884 / 916 17.8 56.2 8.2X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 int row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7217 / 7354 2.2 458.8 1.0X +Parquet Vectorized (Pushdown) 257 / 268 61.1 16.4 28.0X +Native ORC Vectorized 6437 / 6572 2.4 409.3 1.1X +Native ORC Vectorized (Pushdown) 900 / 919 17.5 57.2 8.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 int row (value <=> 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7330 / 7461 2.1 466.0 1.0X +Parquet Vectorized (Pushdown) 255 / 268 61.6 16.2 28.7X +Native ORC Vectorized 6467 / 6525 2.4 411.1 1.1X +Native ORC Vectorized (Pushdown) 886 / 924 17.8 56.3 8.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 int row (7864320 <= value <= 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7128 / 7237 2.2 453.2 1.0X +Parquet Vectorized (Pushdown) 265 / 273 59.5 16.8 26.9X +Native ORC Vectorized 6471 / 6594 2.4 411.4 1.1X +Native ORC Vectorized (Pushdown) 872 / 884 18.0 55.4 8.2X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 int row (7864319 < value < 7864321): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7187 / 7318 2.2 456.9 1.0X +Parquet Vectorized (Pushdown) 249 / 266 63.1 15.9 28.8X +Native ORC Vectorized 6522 / 6577 2.4 414.7 1.1X +Native ORC Vectorized (Pushdown) 858 / 885 18.3 54.5 8.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 10% int rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 8165 / 8313 1.9 519.1 1.0X +Parquet Vectorized (Pushdown) 1795 / 1825 8.8 114.1 4.5X +Native ORC Vectorized 7420 / 7597 2.1 471.8 1.1X +Native ORC Vectorized (Pushdown) 2374 / 2437 6.6 150.9 3.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 50% int rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 11249 / 11496 1.4 715.2 1.0X +Parquet Vectorized (Pushdown) 7719 / 7874 2.0 490.8 1.5X +Native ORC Vectorized 10676 / 10895 1.5 678.7 1.1X +Native ORC Vectorized (Pushdown) 8184 / 8294 1.9 520.3 1.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 90% int rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 14400 / 14492 1.1 915.5 1.0X +Parquet Vectorized (Pushdown) 13817 / 13927 1.1 878.5 1.0X +Native ORC Vectorized 14192 / 14247 1.1 902.3 1.0X +Native ORC Vectorized (Pushdown) 13963 / 14044 1.1 887.8 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select all int rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 15036 / 15229 1.0 956.0 1.0X +Parquet Vectorized (Pushdown) 15283 / 15367 1.0 971.7 1.0X +Native ORC Vectorized 14722 / 14863 1.1 936.0 1.0X +Native ORC Vectorized (Pushdown) 15217 / 15287 1.0 967.5 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select all int rows (value > -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 15316 / 15478 1.0 973.8 1.0X +Parquet Vectorized (Pushdown) 15115 / 15220 1.0 961.0 1.0X +Native ORC Vectorized 14869 / 14992 1.1 945.3 1.0X +Native ORC Vectorized (Pushdown) 15163 / 15373 1.0 964.0 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select all int rows (value != -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 15137 / 15239 1.0 962.4 1.0X +Parquet Vectorized (Pushdown) 15240 / 15350 1.0 968.9 1.0X +Native ORC Vectorized 14746 / 15074 1.1 937.5 1.0X +Native ORC Vectorized (Pushdown) 15089 / 15170 1.0 959.4 1.0X + +###############[ Pushdown for few distinct value case (use dictionary encoding) ]################ +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 0 distinct string row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 6965 / 7053 2.3 442.8 1.0X +Parquet Vectorized (Pushdown) 219 / 235 71.8 13.9 31.8X +Native ORC Vectorized 6444 / 6668 2.4 409.7 1.1X +Native ORC Vectorized (Pushdown) 862 / 893 18.2 54.8 8.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 0 distinct string row ('100' < value < '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7318 / 7366 2.1 465.3 1.0X +Parquet Vectorized (Pushdown) 219 / 230 71.8 13.9 33.4X +Native ORC Vectorized 6857 / 6945 2.3 435.9 1.1X +Native ORC Vectorized (Pushdown) 868 / 910 18.1 55.2 8.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 distinct string row (value = '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7281 / 7357 2.2 462.9 1.0X +Parquet Vectorized (Pushdown) 397 / 407 39.6 25.3 18.3X +Native ORC Vectorized 6819 / 6903 2.3 433.5 1.1X +Native ORC Vectorized (Pushdown) 1056 / 1081 14.9 67.2 6.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 distinct string row (value <=> '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7218 / 7342 2.2 458.9 1.0X +Parquet Vectorized (Pushdown) 390 / 401 40.3 24.8 18.5X +Native ORC Vectorized 6768 / 6830 2.3 430.3 1.1X +Native ORC Vectorized (Pushdown) 1044 / 1073 15.1 66.4 6.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 distinct string row ('100' <= value <= '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7501 / 7575 2.1 476.9 1.0X +Parquet Vectorized (Pushdown) 398 / 414 39.5 25.3 18.9X +Native ORC Vectorized 6964 / 7046 2.3 442.8 1.1X +Native ORC Vectorized (Pushdown) 1053 / 1100 14.9 67.0 7.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select all distinct string rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 16690 / 16840 0.9 1061.1 1.0X +Parquet Vectorized (Pushdown) 16688 / 16787 0.9 1061.0 1.0X +Native ORC Vectorized 16252 / 16518 1.0 1033.3 1.0X +Native ORC Vectorized (Pushdown) 16537 / 16948 1.0 1051.4 1.0X + +###########################[ Pushdown benchmark for StringStartsWith ]########################### +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +StringStartsWith filter: (value like '10%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 9599 / 11663 1.6 610.3 1.0X +Parquet Vectorized (Pushdown) 9774 / 10623 1.6 621.4 1.0X +Native ORC Vectorized 9179 / 9654 1.7 583.6 1.0X +Native ORC Vectorized (Pushdown) 9537 / 10270 1.6 606.4 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +StringStartsWith filter: (value like '1000%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7937 / 8133 2.0 504.6 1.0X +Parquet Vectorized (Pushdown) 7965 / 8032 2.0 506.4 1.0X +Native ORC Vectorized 7096 / 7191 2.2 451.1 1.1X +Native ORC Vectorized (Pushdown) 7512 / 7580 2.1 477.6 1.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +StringStartsWith filter: (value like '786432%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7958 / 7990 2.0 506.0 1.0X +Parquet Vectorized (Pushdown) 7913 / 7957 2.0 503.1 1.0X +Native ORC Vectorized 7103 / 7176 2.2 451.6 1.1X +Native ORC Vectorized (Pushdown) 7491 / 7538 2.1 476.3 1.1X + +###############################[ Pushdown benchmark for Decimal ]################################ +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 decimal(9, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 3621 / 3686 4.3 230.2 1.0X +Parquet Vectorized (Pushdown) 3616 / 3666 4.3 229.9 1.0X +Native ORC Vectorized 3976 / 4067 4.0 252.8 0.9X +Native ORC Vectorized (Pushdown) 623 / 643 25.3 39.6 5.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 10% decimal(9, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 4510 / 4640 3.5 286.7 1.0X +Parquet Vectorized (Pushdown) 4495 / 4575 3.5 285.8 1.0X +Native ORC Vectorized 4854 / 4946 3.2 308.6 0.9X +Native ORC Vectorized (Pushdown) 1889 / 1988 8.3 120.1 2.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 50% decimal(9, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7918 / 8018 2.0 503.4 1.0X +Parquet Vectorized (Pushdown) 7914 / 8027 2.0 503.2 1.0X +Native ORC Vectorized 8342 / 8512 1.9 530.3 0.9X +Native ORC Vectorized (Pushdown) 7224 / 7320 2.2 459.3 1.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 90% decimal(9, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 9301 / 9348 1.7 591.3 1.0X +Parquet Vectorized (Pushdown) 9239 / 9338 1.7 587.4 1.0X +Native ORC Vectorized 9741 / 9901 1.6 619.3 1.0X +Native ORC Vectorized (Pushdown) 9146 / 9335 1.7 581.5 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 decimal(18, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 3893 / 3947 4.0 247.5 1.0X +Parquet Vectorized (Pushdown) 3898 / 3936 4.0 247.8 1.0X +Native ORC Vectorized 4728 / 4808 3.3 300.6 0.8X +Native ORC Vectorized (Pushdown) 777 / 810 20.2 49.4 5.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 10% decimal(18, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 4835 / 4857 3.3 307.4 1.0X +Parquet Vectorized (Pushdown) 4788 / 4863 3.3 304.4 1.0X +Native ORC Vectorized 5551 / 5664 2.8 352.9 0.9X +Native ORC Vectorized (Pushdown) 2074 / 2134 7.6 131.9 2.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 50% decimal(18, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 8243 / 8317 1.9 524.1 1.0X +Parquet Vectorized (Pushdown) 8277 / 8314 1.9 526.3 1.0X +Native ORC Vectorized 9190 / 9335 1.7 584.3 0.9X +Native ORC Vectorized (Pushdown) 7426 / 7507 2.1 472.1 1.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 90% decimal(18, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 11441 / 11561 1.4 727.4 1.0X +Parquet Vectorized (Pushdown) 11386 / 11666 1.4 723.9 1.0X +Native ORC Vectorized 12756 / 12920 1.2 811.0 0.9X +Native ORC Vectorized (Pushdown) 12638 / 12817 1.2 803.5 0.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 decimal(38, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 5344 / 5393 2.9 339.8 1.0X +Parquet Vectorized (Pushdown) 5239 / 5406 3.0 333.1 1.0X +Native ORC Vectorized 4675 / 4742 3.4 297.2 1.1X +Native ORC Vectorized (Pushdown) 801 / 836 19.6 50.9 6.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 10% decimal(38, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 6424 / 6507 2.4 408.4 1.0X +Parquet Vectorized (Pushdown) 6364 / 6465 2.5 404.6 1.0X +Native ORC Vectorized 5744 / 5847 2.7 365.2 1.1X +Native ORC Vectorized (Pushdown) 2221 / 2249 7.1 141.2 2.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 50% decimal(38, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 10524 / 10642 1.5 669.1 1.0X +Parquet Vectorized (Pushdown) 10510 / 10634 1.5 668.2 1.0X +Native ORC Vectorized 9796 / 9892 1.6 622.8 1.1X +Native ORC Vectorized (Pushdown) 8035 / 8160 2.0 510.9 1.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 90% decimal(38, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 14674 / 14807 1.1 933.0 1.0X +Parquet Vectorized (Pushdown) 14567 / 14703 1.1 926.1 1.0X +Native ORC Vectorized 13811 / 13906 1.1 878.1 1.1X +Native ORC Vectorized (Pushdown) 13785 / 13853 1.1 876.4 1.1X + +##########################[ Pushdown benchmark for InSet -> InFilters ]########################## +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (1, 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7286 / 7365 2.2 463.2 1.0X +Parquet Vectorized (Pushdown) 7135 / 7327 2.2 453.6 1.0X +Native ORC Vectorized 6604 / 7531 2.4 419.9 1.1X +Native ORC Vectorized (Pushdown) 924 / 948 17.0 58.8 7.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (1, 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7086 / 7207 2.2 450.5 1.0X +Parquet Vectorized (Pushdown) 7166 / 7253 2.2 455.6 1.0X +Native ORC Vectorized 6569 / 6597 2.4 417.6 1.1X +Native ORC Vectorized (Pushdown) 900 / 932 17.5 57.2 7.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (1, 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7216 / 7295 2.2 458.8 1.0X +Parquet Vectorized (Pushdown) 7075 / 7180 2.2 449.8 1.0X +Native ORC Vectorized 6488 / 6591 2.4 412.5 1.1X +Native ORC Vectorized (Pushdown) 917 / 947 17.2 58.3 7.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (10, 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7225 / 7303 2.2 459.4 1.0X +Parquet Vectorized (Pushdown) 7278 / 7327 2.2 462.7 1.0X +Native ORC Vectorized 6507 / 6585 2.4 413.7 1.1X +Native ORC Vectorized (Pushdown) 954 / 980 16.5 60.7 7.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (10, 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7148 / 7269 2.2 454.4 1.0X +Parquet Vectorized (Pushdown) 7149 / 7248 2.2 454.5 1.0X +Native ORC Vectorized 6530 / 6617 2.4 415.2 1.1X +Native ORC Vectorized (Pushdown) 946 / 968 16.6 60.2 7.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (10, 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7190 / 7247 2.2 457.1 1.0X +Parquet Vectorized (Pushdown) 7124 / 7261 2.2 452.9 1.0X +Native ORC Vectorized 6515 / 6604 2.4 414.2 1.1X +Native ORC Vectorized (Pushdown) 916 / 950 17.2 58.3 7.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (50, 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7442 / 7518 2.1 473.1 1.0X +Parquet Vectorized (Pushdown) 7471 / 7515 2.1 475.0 1.0X +Native ORC Vectorized 6722 / 6859 2.3 427.4 1.1X +Native ORC Vectorized (Pushdown) 959 / 1019 16.4 61.0 7.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (50, 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7451 / 7535 2.1 473.7 1.0X +Parquet Vectorized (Pushdown) 7369 / 7534 2.1 468.5 1.0X +Native ORC Vectorized 6854 / 6900 2.3 435.7 1.1X +Native ORC Vectorized (Pushdown) 1001 / 1049 15.7 63.7 7.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (50, 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7463 / 7573 2.1 474.5 1.0X +Parquet Vectorized (Pushdown) 7462 / 7572 2.1 474.4 1.0X +Native ORC Vectorized 6710 / 6803 2.3 426.6 1.1X +Native ORC Vectorized (Pushdown) 1011 / 1052 15.6 64.3 7.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (100, 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7322 / 7435 2.1 465.5 1.0X +Parquet Vectorized (Pushdown) 7477 / 7554 2.1 475.4 1.0X +Native ORC Vectorized 6667 / 6804 2.4 423.9 1.1X +Native ORC Vectorized (Pushdown) 1069 / 1103 14.7 68.0 6.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (100, 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7458 / 7509 2.1 474.2 1.0X +Parquet Vectorized (Pushdown) 7426 / 7515 2.1 472.2 1.0X +Native ORC Vectorized 6660 / 6761 2.4 423.4 1.1X +Native ORC Vectorized (Pushdown) 1138 / 1180 13.8 72.3 6.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +InSet -> InFilters filter: (100, 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 7401 / 7613 2.1 470.5 1.0X +Parquet Vectorized (Pushdown) 7363 / 7498 2.1 468.1 1.0X +Native ORC Vectorized 6780 / 6848 2.3 431.0 1.1X +Native ORC Vectorized (Pushdown) 1129 / 1174 13.9 71.8 6.6X + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala index 6d7c7de9a856..c0932d86987a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala @@ -17,25 +17,30 @@ package org.apache.spark.sql.execution.benchmark -import java.io.File +import java.io.{File, FileOutputStream, OutputStream} import scala.util.{Random, Try} +import org.scalatest.{BeforeAndAfterEachTestData, Suite, TestData} + import org.apache.spark.SparkConf +import org.apache.spark.SparkFunSuite import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.functions.monotonically_increasing_id import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{Decimal, DecimalType} import org.apache.spark.util.{Benchmark, Utils} - /** * Benchmark to measure read performance with Filter pushdown. * To run this: - * spark-submit --class + * build/sbt "sql/test-only *FilterPushdownBenchmark" + * + * Results will be written to "benchmarks/FilterPushdownBenchmark-results.txt". */ -object FilterPushdownBenchmark { - val conf = new SparkConf() - .setAppName("FilterPushdownBenchmark") +class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfterEachTest { + private val conf = new SparkConf() + .setAppName(this.getClass.getSimpleName) // Since `spark.master` always exists, overrides this value .set("spark.master", "local[1]") .setIfMissing("spark.driver.memory", "3g") @@ -44,8 +49,33 @@ object FilterPushdownBenchmark { .setIfMissing("orc.compression", "snappy") .setIfMissing("spark.sql.parquet.compression.codec", "snappy") + private val numRows = 1024 * 1024 * 15 + private val width = 5 + private val mid = numRows / 2 + private val blockSize = 1048576 + private val spark = SparkSession.builder().config(conf).getOrCreate() + private var out: OutputStream = _ + + override def beforeAll() { + super.beforeAll() + out = new FileOutputStream(new File("benchmarks/FilterPushdownBenchmark-results.txt")) + } + + override def beforeEach(td: TestData) { + super.beforeEach(td) + val testName = "[ " + td.name + " ]" + val halfLength = (97 - testName.length) / 2 + val testHeader = (("".padTo(halfLength, '#') + testName).padTo(97, '#') + "\n").getBytes + out.write(testHeader) + } + + override def afterAll() { + super.afterAll() + out.close() + } + def withTempPath(f: File => Unit): Unit = { val path = Utils.createTempDir() path.delete() @@ -81,8 +111,7 @@ object FilterPushdownBenchmark { .withColumn("value", valueCol) .sort("value") - saveAsOrcTable(df, dir.getCanonicalPath + "/orc") - saveAsParquetTable(df, dir.getCanonicalPath + "/parquet") + saveAsTable(df, dir) } private def prepareStringDictTable( @@ -93,19 +122,22 @@ object FilterPushdownBenchmark { } val df = spark.range(numRows).selectExpr(selectExpr: _*).sort("value") - saveAsOrcTable(df, dir.getCanonicalPath + "/orc") - saveAsParquetTable(df, dir.getCanonicalPath + "/parquet") + saveAsTable(df, dir) } - private def saveAsOrcTable(df: DataFrame, dir: String): Unit = { - // To always turn on dictionary encoding, we set 1.0 at the threshold (the default is 0.8) - df.write.mode("overwrite").option("orc.dictionary.key.threshold", 1.0).orc(dir) - spark.read.orc(dir).createOrReplaceTempView("orcTable") - } + private def saveAsTable(df: DataFrame, dir: File): Unit = { + val orcPath = dir.getCanonicalPath + "/orc" + val parquetPath = dir.getCanonicalPath + "/parquet" - private def saveAsParquetTable(df: DataFrame, dir: String): Unit = { - df.write.mode("overwrite").parquet(dir) - spark.read.parquet(dir).createOrReplaceTempView("parquetTable") + // To always turn on dictionary encoding, we set 1.0 at the threshold (the default is 0.8) + df.write.mode("overwrite") + .option("orc.dictionary.key.threshold", 1.0) + .option("orc.stripe.size", blockSize).orc(orcPath) + spark.read.orc(orcPath).createOrReplaceTempView("orcTable") + + df.write.mode("overwrite") + .option("parquet.block.size", blockSize).parquet(parquetPath) + spark.read.parquet(parquetPath).createOrReplaceTempView("parquetTable") } def filterPushDownBenchmark( @@ -113,7 +145,7 @@ object FilterPushdownBenchmark { title: String, whereExpr: String, selectExpr: String = "*"): Unit = { - val benchmark = new Benchmark(title, values, minNumIters = 5) + val benchmark = new Benchmark(title, values, minNumIters = 5, output = Some(out)) Seq(false, true).foreach { pushDownEnabled => val name = s"Parquet Vectorized ${if (pushDownEnabled) s"(Pushdown)" else ""}" @@ -133,214 +165,6 @@ object FilterPushdownBenchmark { } } - /* - OpenJDK 64-Bit Server VM 1.8.0_171-b10 on Linux 4.14.33-51.37.amzn1.x86_64 - Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz - Select 0 string row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 9201 / 9300 1.7 585.0 1.0X - Parquet Vectorized (Pushdown) 89 / 105 176.3 5.7 103.1X - Native ORC Vectorized 8886 / 8898 1.8 564.9 1.0X - Native ORC Vectorized (Pushdown) 110 / 128 143.4 7.0 83.9X - - - Select 0 string row - ('7864320' < value < '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 9336 / 9357 1.7 593.6 1.0X - Parquet Vectorized (Pushdown) 927 / 937 17.0 58.9 10.1X - Native ORC Vectorized 9026 / 9041 1.7 573.9 1.0X - Native ORC Vectorized (Pushdown) 257 / 272 61.1 16.4 36.3X - - - Select 1 string row (value = '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 9209 / 9223 1.7 585.5 1.0X - Parquet Vectorized (Pushdown) 908 / 925 17.3 57.7 10.1X - Native ORC Vectorized 8878 / 8904 1.8 564.4 1.0X - Native ORC Vectorized (Pushdown) 248 / 261 63.4 15.8 37.1X - - - Select 1 string row - (value <=> '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 9194 / 9216 1.7 584.5 1.0X - Parquet Vectorized (Pushdown) 899 / 908 17.5 57.2 10.2X - Native ORC Vectorized 8934 / 8962 1.8 568.0 1.0X - Native ORC Vectorized (Pushdown) 249 / 254 63.3 15.8 37.0X - - - Select 1 string row - ('7864320' <= value <= '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 9332 / 9351 1.7 593.3 1.0X - Parquet Vectorized (Pushdown) 915 / 934 17.2 58.2 10.2X - Native ORC Vectorized 9049 / 9057 1.7 575.3 1.0X - Native ORC Vectorized (Pushdown) 248 / 258 63.5 15.8 37.7X - - - Select all string rows - (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 20478 / 20497 0.8 1301.9 1.0X - Parquet Vectorized (Pushdown) 20461 / 20550 0.8 1300.9 1.0X - Native ORC Vectorized 27464 / 27482 0.6 1746.1 0.7X - Native ORC Vectorized (Pushdown) 27454 / 27488 0.6 1745.5 0.7X - - - Select 0 int row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 8489 / 8519 1.9 539.7 1.0X - Parquet Vectorized (Pushdown) 64 / 69 246.1 4.1 132.8X - Native ORC Vectorized 8064 / 8099 2.0 512.7 1.1X - Native ORC Vectorized (Pushdown) 88 / 94 178.6 5.6 96.4X - - - Select 0 int row - (7864320 < value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 8494 / 8514 1.9 540.0 1.0X - Parquet Vectorized (Pushdown) 835 / 840 18.8 53.1 10.2X - Native ORC Vectorized 8090 / 8106 1.9 514.4 1.0X - Native ORC Vectorized (Pushdown) 249 / 257 63.2 15.8 34.1X - - - Select 1 int row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 8552 / 8560 1.8 543.7 1.0X - Parquet Vectorized (Pushdown) 837 / 841 18.8 53.2 10.2X - Native ORC Vectorized 8178 / 8188 1.9 519.9 1.0X - Native ORC Vectorized (Pushdown) 249 / 258 63.2 15.8 34.4X - - - Select 1 int row (value <=> 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 8562 / 8580 1.8 544.3 1.0X - Parquet Vectorized (Pushdown) 833 / 836 18.9 53.0 10.3X - Native ORC Vectorized 8164 / 8185 1.9 519.0 1.0X - Native ORC Vectorized (Pushdown) 245 / 254 64.3 15.6 35.0X - - - Select 1 int row - (7864320 <= value <= 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 8540 / 8555 1.8 542.9 1.0X - Parquet Vectorized (Pushdown) 837 / 839 18.8 53.2 10.2X - Native ORC Vectorized 8182 / 8231 1.9 520.2 1.0X - Native ORC Vectorized (Pushdown) 250 / 259 62.9 15.9 34.1X - - - Select 1 int row - (7864319 < value < 7864321): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 8535 / 8555 1.8 542.6 1.0X - Parquet Vectorized (Pushdown) 835 / 841 18.8 53.1 10.2X - Native ORC Vectorized 8159 / 8179 1.9 518.8 1.0X - Native ORC Vectorized (Pushdown) 244 / 250 64.5 15.5 35.0X - - - Select 10% int rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 9609 / 9634 1.6 610.9 1.0X - Parquet Vectorized (Pushdown) 2663 / 2672 5.9 169.3 3.6X - Native ORC Vectorized 9824 / 9850 1.6 624.6 1.0X - Native ORC Vectorized (Pushdown) 2717 / 2722 5.8 172.7 3.5X - - - Select 50% int rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 13592 / 13613 1.2 864.2 1.0X - Parquet Vectorized (Pushdown) 9720 / 9738 1.6 618.0 1.4X - Native ORC Vectorized 16366 / 16397 1.0 1040.5 0.8X - Native ORC Vectorized (Pushdown) 12437 / 12459 1.3 790.7 1.1X - - - Select 90% int rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 17580 / 17617 0.9 1117.7 1.0X - Parquet Vectorized (Pushdown) 16803 / 16827 0.9 1068.3 1.0X - Native ORC Vectorized 24169 / 24187 0.7 1536.6 0.7X - Native ORC Vectorized (Pushdown) 22147 / 22341 0.7 1408.1 0.8X - - - Select all int rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 18461 / 18491 0.9 1173.7 1.0X - Parquet Vectorized (Pushdown) 18466 / 18530 0.9 1174.1 1.0X - Native ORC Vectorized 24231 / 24270 0.6 1540.6 0.8X - Native ORC Vectorized (Pushdown) 24207 / 24304 0.6 1539.0 0.8X - - - Select all int rows (value > -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 18414 / 18453 0.9 1170.7 1.0X - Parquet Vectorized (Pushdown) 18435 / 18464 0.9 1172.1 1.0X - Native ORC Vectorized 24430 / 24454 0.6 1553.2 0.8X - Native ORC Vectorized (Pushdown) 24410 / 24465 0.6 1552.0 0.8X - - - Select all int rows (value != -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 18446 / 18457 0.9 1172.8 1.0X - Parquet Vectorized (Pushdown) 18428 / 18440 0.9 1171.6 1.0X - Native ORC Vectorized 24414 / 24450 0.6 1552.2 0.8X - Native ORC Vectorized (Pushdown) 24385 / 24472 0.6 1550.4 0.8X - - - Select 0 distinct string row - (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 8322 / 8352 1.9 529.1 1.0X - Parquet Vectorized (Pushdown) 53 / 57 296.3 3.4 156.7X - Native ORC Vectorized 7903 / 7953 2.0 502.4 1.1X - Native ORC Vectorized (Pushdown) 80 / 82 197.2 5.1 104.3X - - - Select 0 distinct string row - ('100' < value < '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 8712 / 8743 1.8 553.9 1.0X - Parquet Vectorized (Pushdown) 995 / 1030 15.8 63.3 8.8X - Native ORC Vectorized 8345 / 8362 1.9 530.6 1.0X - Native ORC Vectorized (Pushdown) 84 / 87 187.6 5.3 103.9X - - - Select 1 distinct string row - (value = '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 8574 / 8610 1.8 545.1 1.0X - Parquet Vectorized (Pushdown) 1127 / 1135 14.0 71.6 7.6X - Native ORC Vectorized 8163 / 8181 1.9 519.0 1.1X - Native ORC Vectorized (Pushdown) 426 / 433 36.9 27.1 20.1X - - - Select 1 distinct string row - (value <=> '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 8549 / 8568 1.8 543.5 1.0X - Parquet Vectorized (Pushdown) 1124 / 1131 14.0 71.4 7.6X - Native ORC Vectorized 8163 / 8210 1.9 519.0 1.0X - Native ORC Vectorized (Pushdown) 426 / 436 36.9 27.1 20.1X - - - Select 1 distinct string row - ('100' <= value <= '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 8889 / 8896 1.8 565.2 1.0X - Parquet Vectorized (Pushdown) 1161 / 1168 13.6 73.8 7.7X - Native ORC Vectorized 8519 / 8554 1.8 541.6 1.0X - Native ORC Vectorized (Pushdown) 430 / 437 36.6 27.3 20.7X - - - Select all distinct string rows - (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Parquet Vectorized 20433 / 20533 0.8 1299.1 1.0X - Parquet Vectorized (Pushdown) 20433 / 20456 0.8 1299.1 1.0X - Native ORC Vectorized 25435 / 25513 0.6 1617.1 0.8X - Native ORC Vectorized (Pushdown) 25435 / 25507 0.6 1617.1 0.8X - */ - benchmark.run() } @@ -408,11 +232,7 @@ object FilterPushdownBenchmark { } } - def main(args: Array[String]): Unit = { - val numRows = 1024 * 1024 * 15 - val width = 5 - - // Pushdown for many distinct value case + ignore("Pushdown for many distinct value case") { withTempPath { dir => val mid = numRows / 2 @@ -427,16 +247,97 @@ object FilterPushdownBenchmark { } } } + } - // Pushdown for few distinct value case (use dictionary encoding) + ignore("Pushdown for few distinct value case (use dictionary encoding)") { withTempPath { dir => val numDistinctValues = 200 - val mid = numDistinctValues / 2 withTempTable("orcTable", "patquetTable") { prepareStringDictTable(dir, numRows, numDistinctValues, width) - runStringBenchmark(numRows, width, mid, "distinct string") + runStringBenchmark(numRows, width, numDistinctValues / 2, "distinct string") } } } + + ignore("Pushdown benchmark for StringStartsWith") { + withTempPath { dir => + val mid = numRows / 2 + withTempTable("orcTable", "patquetTable") { + prepareTable(dir, numRows, width, true) + Seq( + "value like '10%'", + "value like '1000%'", + s"value like '${mid.toString.substring(0, mid.toString.length - 1)}%'" + ).foreach { whereExpr => + val title = s"StringStartsWith filter: ($whereExpr)" + filterPushDownBenchmark(numRows, title, whereExpr) + } + } + } + } + + ignore("Pushdown benchmark for Decimal") { + withTempPath { dir => + val mid = numRows / 2 + + Seq( + s"decimal(${Decimal.MAX_INT_DIGITS}, 2)", + s"decimal(${Decimal.MAX_LONG_DIGITS}, 2)", + s"decimal(${DecimalType.MAX_PRECISION}, 2)" + ).foreach { dt => + val columns = (1 to width).map(i => s"CAST(id AS string) c$i") + val df = spark.range(numRows).selectExpr(columns: _*) + .withColumn("value", monotonically_increasing_id().cast(dt)) + withTempTable("orcTable", "patquetTable") { + saveAsTable(df, dir) + + Seq(s"value = $mid").foreach { whereExpr => + val title = s"$dt: " + + s"Select 1 $dt row ($whereExpr)".replace("value AND value", "value") + filterPushDownBenchmark(numRows, title, whereExpr) + } + + val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", MAX(value)") + Seq(10, 50, 90).foreach { percent => + filterPushDownBenchmark( + numRows, + s"$dt: Select $percent% $dt rows (value < ${numRows * percent / 100})", + s"value < ${numRows * percent / 100}", + selectExpr + ) + } + } + } + } + } + + ignore("Pushdown benchmark for InSet -> InFilters") { + withTempPath { dir => + val mid = numRows / 2 + withTempTable("orcTable", "patquetTable") { + prepareTable(dir, numRows, width, false) + Seq(1, 10, 50, 100).foreach { number => + Seq(10, 50, 90).foreach { distribute => + val filter = + Range(0, number).map(r => scala.util.Random.nextInt(numRows * distribute / 100)) + val whereExpr = s"value in(${filter.mkString(",")})" + val title = s"InSet -> InFilters filter: ($number, $distribute)" + filterPushDownBenchmark(numRows, title, whereExpr) + } + } + } + } + } +} + +trait BenchmarkBeforeAndAfterEachTest extends BeforeAndAfterEachTestData { this: Suite => + + override def beforeEach(td: TestData) { + super.beforeEach(td) + } + + override def afterEach(td: TestData) { + super.afterEach(td) + } } From 616933e3759739dcdae2140f5c58b659c943ab7f Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 30 Jun 2018 12:29:27 +0800 Subject: [PATCH 2/5] Remove duplicate val mid = numRows / 2 --- .../FilterPushdownBenchmark-results.txt | 120 +++++++++--------- .../benchmark/FilterPushdownBenchmark.scala | 14 +- 2 files changed, 64 insertions(+), 70 deletions(-) diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt index 3dd116ebd2fd..8280ccddc98c 100644 --- a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt +++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt @@ -396,120 +396,120 @@ Native ORC Vectorized (Pushdown) 13785 / 13853 1.1 8 Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (1, 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 5, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7286 / 7365 2.2 463.2 1.0X -Parquet Vectorized (Pushdown) 7135 / 7327 2.2 453.6 1.0X -Native ORC Vectorized 6604 / 7531 2.4 419.9 1.1X -Native ORC Vectorized (Pushdown) 924 / 948 17.0 58.8 7.9X +Parquet Vectorized 7657 / 7918 2.1 486.8 1.0X +Parquet Vectorized (Pushdown) 7581 / 7658 2.1 482.0 1.0X +Native ORC Vectorized 7338 / 7682 2.1 466.6 1.0X +Native ORC Vectorized (Pushdown) 1345 / 1354 11.7 85.5 5.7X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (1, 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 5, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7086 / 7207 2.2 450.5 1.0X -Parquet Vectorized (Pushdown) 7166 / 7253 2.2 455.6 1.0X -Native ORC Vectorized 6569 / 6597 2.4 417.6 1.1X -Native ORC Vectorized (Pushdown) 900 / 932 17.5 57.2 7.9X +Parquet Vectorized 7647 / 7707 2.1 486.2 1.0X +Parquet Vectorized (Pushdown) 7600 / 7758 2.1 483.2 1.0X +Native ORC Vectorized 7376 / 8002 2.1 469.0 1.0X +Native ORC Vectorized (Pushdown) 1324 / 1359 11.9 84.2 5.8X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (1, 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 5, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7216 / 7295 2.2 458.8 1.0X -Parquet Vectorized (Pushdown) 7075 / 7180 2.2 449.8 1.0X -Native ORC Vectorized 6488 / 6591 2.4 412.5 1.1X -Native ORC Vectorized (Pushdown) 917 / 947 17.2 58.3 7.9X +Parquet Vectorized 7587 / 7642 2.1 482.3 1.0X +Parquet Vectorized (Pushdown) 7570 / 7631 2.1 481.3 1.0X +Native ORC Vectorized 7440 / 8116 2.1 473.0 1.0X +Native ORC Vectorized (Pushdown) 1325 / 1338 11.9 84.2 5.7X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (10, 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 10, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7225 / 7303 2.2 459.4 1.0X -Parquet Vectorized (Pushdown) 7278 / 7327 2.2 462.7 1.0X -Native ORC Vectorized 6507 / 6585 2.4 413.7 1.1X -Native ORC Vectorized (Pushdown) 954 / 980 16.5 60.7 7.6X +Parquet Vectorized 7613 / 8941 2.1 484.0 1.0X +Parquet Vectorized (Pushdown) 7657 / 7777 2.1 486.8 1.0X +Native ORC Vectorized 7422 / 8243 2.1 471.9 1.0X +Native ORC Vectorized (Pushdown) 1361 / 1463 11.6 86.6 5.6X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (10, 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 10, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7148 / 7269 2.2 454.4 1.0X -Parquet Vectorized (Pushdown) 7149 / 7248 2.2 454.5 1.0X -Native ORC Vectorized 6530 / 6617 2.4 415.2 1.1X -Native ORC Vectorized (Pushdown) 946 / 968 16.6 60.2 7.6X +Parquet Vectorized 7643 / 7719 2.1 485.9 1.0X +Parquet Vectorized (Pushdown) 7598 / 7644 2.1 483.1 1.0X +Native ORC Vectorized 7311 / 7404 2.2 464.8 1.0X +Native ORC Vectorized (Pushdown) 1279 / 1313 12.3 81.3 6.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (10, 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 10, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7190 / 7247 2.2 457.1 1.0X -Parquet Vectorized (Pushdown) 7124 / 7261 2.2 452.9 1.0X -Native ORC Vectorized 6515 / 6604 2.4 414.2 1.1X -Native ORC Vectorized (Pushdown) 916 / 950 17.2 58.3 7.8X +Parquet Vectorized 7580 / 7649 2.1 482.0 1.0X +Parquet Vectorized (Pushdown) 7553 / 7634 2.1 480.2 1.0X +Native ORC Vectorized 7477 / 7576 2.1 475.4 1.0X +Native ORC Vectorized (Pushdown) 1317 / 1361 11.9 83.7 5.8X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (50, 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 50, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7442 / 7518 2.1 473.1 1.0X -Parquet Vectorized (Pushdown) 7471 / 7515 2.1 475.0 1.0X -Native ORC Vectorized 6722 / 6859 2.3 427.4 1.1X -Native ORC Vectorized (Pushdown) 959 / 1019 16.4 61.0 7.8X +Parquet Vectorized 7783 / 7850 2.0 494.8 1.0X +Parquet Vectorized (Pushdown) 7866 / 7876 2.0 500.1 1.0X +Native ORC Vectorized 7633 / 7750 2.1 485.3 1.0X +Native ORC Vectorized (Pushdown) 1432 / 1457 11.0 91.0 5.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (50, 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 50, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7451 / 7535 2.1 473.7 1.0X -Parquet Vectorized (Pushdown) 7369 / 7534 2.1 468.5 1.0X -Native ORC Vectorized 6854 / 6900 2.3 435.7 1.1X -Native ORC Vectorized (Pushdown) 1001 / 1049 15.7 63.7 7.4X +Parquet Vectorized 7785 / 7854 2.0 495.0 1.0X +Parquet Vectorized (Pushdown) 7860 / 7874 2.0 499.7 1.0X +Native ORC Vectorized 7665 / 7712 2.1 487.3 1.0X +Native ORC Vectorized (Pushdown) 1457 / 1479 10.8 92.6 5.3X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (50, 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 50, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7463 / 7573 2.1 474.5 1.0X -Parquet Vectorized (Pushdown) 7462 / 7572 2.1 474.4 1.0X -Native ORC Vectorized 6710 / 6803 2.3 426.6 1.1X -Native ORC Vectorized (Pushdown) 1011 / 1052 15.6 64.3 7.4X +Parquet Vectorized 7868 / 7900 2.0 500.2 1.0X +Parquet Vectorized (Pushdown) 7832 / 7888 2.0 498.0 1.0X +Native ORC Vectorized 7678 / 7716 2.0 488.2 1.0X +Native ORC Vectorized (Pushdown) 1473 / 1540 10.7 93.6 5.3X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (100, 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 100, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7322 / 7435 2.1 465.5 1.0X -Parquet Vectorized (Pushdown) 7477 / 7554 2.1 475.4 1.0X -Native ORC Vectorized 6667 / 6804 2.4 423.9 1.1X -Native ORC Vectorized (Pushdown) 1069 / 1103 14.7 68.0 6.8X +Parquet Vectorized 7739 / 7761 2.0 492.0 1.0X +Parquet Vectorized (Pushdown) 7733 / 7778 2.0 491.6 1.0X +Native ORC Vectorized 7549 / 7596 2.1 480.0 1.0X +Native ORC Vectorized (Pushdown) 1536 / 1544 10.2 97.7 5.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (100, 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 100, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7458 / 7509 2.1 474.2 1.0X -Parquet Vectorized (Pushdown) 7426 / 7515 2.1 472.2 1.0X -Native ORC Vectorized 6660 / 6761 2.4 423.4 1.1X -Native ORC Vectorized (Pushdown) 1138 / 1180 13.8 72.3 6.6X +Parquet Vectorized 7794 / 7806 2.0 495.5 1.0X +Parquet Vectorized (Pushdown) 7799 / 7880 2.0 495.8 1.0X +Native ORC Vectorized 7576 / 7599 2.1 481.7 1.0X +Native ORC Vectorized (Pushdown) 1554 / 1576 10.1 98.8 5.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -InSet -> InFilters filter: (100, 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +InSet -> InFilters (values count: 100, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7401 / 7613 2.1 470.5 1.0X -Parquet Vectorized (Pushdown) 7363 / 7498 2.1 468.1 1.0X -Native ORC Vectorized 6780 / 6848 2.3 431.0 1.1X -Native ORC Vectorized (Pushdown) 1129 / 1174 13.9 71.8 6.6X +Parquet Vectorized 7778 / 7832 2.0 494.5 1.0X +Parquet Vectorized (Pushdown) 7772 / 7837 2.0 494.1 1.0X +Native ORC Vectorized 7615 / 7647 2.1 484.1 1.0X +Native ORC Vectorized (Pushdown) 1541 / 1577 10.2 98.0 5.0X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala index c0932d86987a..c04b63955f9f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala @@ -234,8 +234,6 @@ class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfter ignore("Pushdown for many distinct value case") { withTempPath { dir => - val mid = numRows / 2 - withTempTable("orcTable", "patquetTable") { Seq(true, false).foreach { useStringForValue => prepareTable(dir, numRows, width, useStringForValue) @@ -262,7 +260,6 @@ class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfter ignore("Pushdown benchmark for StringStartsWith") { withTempPath { dir => - val mid = numRows / 2 withTempTable("orcTable", "patquetTable") { prepareTable(dir, numRows, width, true) Seq( @@ -279,8 +276,6 @@ class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfter ignore("Pushdown benchmark for Decimal") { withTempPath { dir => - val mid = numRows / 2 - Seq( s"decimal(${Decimal.MAX_INT_DIGITS}, 2)", s"decimal(${Decimal.MAX_LONG_DIGITS}, 2)", @@ -314,15 +309,14 @@ class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfter ignore("Pushdown benchmark for InSet -> InFilters") { withTempPath { dir => - val mid = numRows / 2 withTempTable("orcTable", "patquetTable") { prepareTable(dir, numRows, width, false) - Seq(1, 10, 50, 100).foreach { number => - Seq(10, 50, 90).foreach { distribute => + Seq(5, 10, 50, 100).foreach { count => + Seq(10, 50, 90).foreach { distribution => val filter = - Range(0, number).map(r => scala.util.Random.nextInt(numRows * distribute / 100)) + Range(0, count).map(r => scala.util.Random.nextInt(numRows * distribution / 100)) val whereExpr = s"value in(${filter.mkString(",")})" - val title = s"InSet -> InFilters filter: ($number, $distribute)" + val title = s"InSet -> InFilters (values count: $count, distribution: $distribution)" filterPushDownBenchmark(numRows, title, whereExpr) } } From be5d2197d3d3ff06c5f80ed66d5a0bb20924e130 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 30 Jun 2018 17:43:46 +0800 Subject: [PATCH 3/5] SPARK-24638 already merged to master, update the StringStartsWith result --- .../FilterPushdownBenchmark-results.txt | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt index 8280ccddc98c..dac8b1b98f05 100644 --- a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt +++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt @@ -246,30 +246,30 @@ Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz StringStartsWith filter: (value like '10%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 9599 / 11663 1.6 610.3 1.0X -Parquet Vectorized (Pushdown) 9774 / 10623 1.6 621.4 1.0X -Native ORC Vectorized 9179 / 9654 1.7 583.6 1.0X -Native ORC Vectorized (Pushdown) 9537 / 10270 1.6 606.4 1.0X +Parquet Vectorized 10104 / 11125 1.6 642.4 1.0X +Parquet Vectorized (Pushdown) 3002 / 3608 5.2 190.8 3.4X +Native ORC Vectorized 9589 / 10454 1.6 609.7 1.1X +Native ORC Vectorized (Pushdown) 9798 / 10509 1.6 622.9 1.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz StringStartsWith filter: (value like '1000%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7937 / 8133 2.0 504.6 1.0X -Parquet Vectorized (Pushdown) 7965 / 8032 2.0 506.4 1.0X -Native ORC Vectorized 7096 / 7191 2.2 451.1 1.1X -Native ORC Vectorized (Pushdown) 7512 / 7580 2.1 477.6 1.1X +Parquet Vectorized 8437 / 8563 1.9 536.4 1.0X +Parquet Vectorized (Pushdown) 279 / 289 56.3 17.8 30.2X +Native ORC Vectorized 7354 / 7568 2.1 467.5 1.1X +Native ORC Vectorized (Pushdown) 7730 / 7972 2.0 491.4 1.1X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz StringStartsWith filter: (value like '786432%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7958 / 7990 2.0 506.0 1.0X -Parquet Vectorized (Pushdown) 7913 / 7957 2.0 503.1 1.0X -Native ORC Vectorized 7103 / 7176 2.2 451.6 1.1X -Native ORC Vectorized (Pushdown) 7491 / 7538 2.1 476.3 1.1X +Parquet Vectorized 8290 / 8510 1.9 527.0 1.0X +Parquet Vectorized (Pushdown) 260 / 272 60.5 16.5 31.9X +Native ORC Vectorized 7361 / 7395 2.1 468.0 1.1X +Native ORC Vectorized (Pushdown) 7694 / 7811 2.0 489.2 1.1X ###############################[ Pushdown benchmark for Decimal ]################################ Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 From ec62e13b9034ca81002dffe07a5ffbd1b425240a Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 1 Jul 2018 03:25:59 +0800 Subject: [PATCH 4/5] Add tinyint benchmark. --- .../FilterPushdownBenchmark-results.txt | 43 ++++++++++++++++++- .../benchmark/FilterPushdownBenchmark.scala | 39 ++++++++++++++--- 2 files changed, 76 insertions(+), 6 deletions(-) diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt index dac8b1b98f05..bc2a7ac6403e 100644 --- a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt +++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt @@ -271,7 +271,7 @@ Parquet Vectorized (Pushdown) 260 / 272 60.5 Native ORC Vectorized 7361 / 7395 2.1 468.0 1.1X Native ORC Vectorized (Pushdown) 7694 / 7811 2.0 489.2 1.1X -###############################[ Pushdown benchmark for Decimal ]################################ +###############################[ Pushdown benchmark for decimal ]################################ Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz @@ -513,3 +513,44 @@ Parquet Vectorized (Pushdown) 7772 / 7837 2.0 4 Native ORC Vectorized 7615 / 7647 2.1 484.1 1.0X Native ORC Vectorized (Pushdown) 1541 / 1577 10.2 98.0 5.0X +###############################[ Pushdown benchmark for tinyint ]################################ +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 1 tinyint row (value = CAST(63 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 4477 / 4626 3.5 284.6 1.0X +Parquet Vectorized (Pushdown) 4329 / 4366 3.6 275.2 1.0X +Native ORC Vectorized 3584 / 3679 4.4 227.9 1.2X +Native ORC Vectorized (Pushdown) 641 / 986 24.5 40.8 7.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 10% tinyint rows (value < CAST(12 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 5007 / 5511 3.1 318.3 1.0X +Parquet Vectorized (Pushdown) 5093 / 5159 3.1 323.8 1.0X +Native ORC Vectorized 4388 / 4844 3.6 279.0 1.1X +Native ORC Vectorized (Pushdown) 1753 / 2271 9.0 111.5 2.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 50% tinyint rows (value < CAST(63 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 8766 / 10085 1.8 557.3 1.0X +Parquet Vectorized (Pushdown) 9732 / 11845 1.6 618.7 0.9X +Native ORC Vectorized 7782 / 10141 2.0 494.8 1.1X +Native ORC Vectorized (Pushdown) 6643 / 10009 2.4 422.4 1.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Select 90% tinyint rows (value < CAST(114 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Parquet Vectorized 12071 / 16726 1.3 767.4 1.0X +Parquet Vectorized (Pushdown) 11776 / 12005 1.3 748.7 1.0X +Native ORC Vectorized 13914 / 17542 1.1 884.6 0.9X +Native ORC Vectorized (Pushdown) 11173 / 12826 1.4 710.4 1.1X + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala index c04b63955f9f..c48182d2e51e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala @@ -28,7 +28,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.functions.monotonically_increasing_id import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{Decimal, DecimalType} +import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType} import org.apache.spark.util.{Benchmark, Utils} /** @@ -274,7 +274,7 @@ class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfter } } - ignore("Pushdown benchmark for Decimal") { + ignore(s"Pushdown benchmark for ${DecimalType.simpleString}") { withTempPath { dir => Seq( s"decimal(${Decimal.MAX_INT_DIGITS}, 2)", @@ -288,8 +288,7 @@ class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfter saveAsTable(df, dir) Seq(s"value = $mid").foreach { whereExpr => - val title = s"$dt: " + - s"Select 1 $dt row ($whereExpr)".replace("value AND value", "value") + val title = s"Select 1 $dt row ($whereExpr)".replace("value AND value", "value") filterPushDownBenchmark(numRows, title, whereExpr) } @@ -297,7 +296,7 @@ class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfter Seq(10, 50, 90).foreach { percent => filterPushDownBenchmark( numRows, - s"$dt: Select $percent% $dt rows (value < ${numRows * percent / 100})", + s"Select $percent% $dt rows (value < ${numRows * percent / 100})", s"value < ${numRows * percent / 100}", selectExpr ) @@ -323,6 +322,36 @@ class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfter } } } + + ignore(s"Pushdown benchmark for ${ByteType.simpleString}") { + withTempPath { dir => + val columns = (1 to width).map(i => s"CAST(id AS string) c$i") + val df = spark.range(numRows).selectExpr(columns: _*) + .withColumn("value", (monotonically_increasing_id() % Byte.MaxValue).cast(ByteType)) + .orderBy("value") + withTempTable("orcTable", "patquetTable") { + saveAsTable(df, dir) + + Seq(s"value = CAST(${Byte.MaxValue / 2} AS ${ByteType.simpleString})") + .foreach { whereExpr => + val title = s"Select 1 ${ByteType.simpleString} row ($whereExpr)" + .replace("value AND value", "value") + filterPushDownBenchmark(numRows, title, whereExpr) + } + + val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", MAX(value)") + Seq(10, 50, 90).foreach { percent => + filterPushDownBenchmark( + numRows, + s"Select $percent% ${ByteType.simpleString} rows " + + s"(value < CAST(${Byte.MaxValue * percent / 100} AS ${ByteType.simpleString}))", + s"value < CAST(${Byte.MaxValue * percent / 100} AS ${ByteType.simpleString})", + selectExpr + ) + } + } + } + } } trait BenchmarkBeforeAndAfterEachTest extends BeforeAndAfterEachTestData { this: Suite => From 021f096485358df231d2181338f274e2202f6ff5 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 5 Jul 2018 19:29:43 +0800 Subject: [PATCH 5/5] update format --- .../FilterPushdownBenchmark-results.txt | 482 +++++++++--------- .../benchmark/FilterPushdownBenchmark.scala | 17 +- 2 files changed, 265 insertions(+), 234 deletions(-) diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt index bc2a7ac6403e..29fe4345d69d 100644 --- a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt +++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt @@ -1,556 +1,580 @@ -############################[ Pushdown for many distinct value case ]############################ +================================================================================================ +Pushdown for many distinct value case +================================================================================================ + Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 0 string row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7928 / 8019 2.0 504.0 1.0X -Parquet Vectorized (Pushdown) 260 / 277 60.4 16.6 30.4X -Native ORC Vectorized 7958 / 8510 2.0 506.0 1.0X -Native ORC Vectorized (Pushdown) 909 / 916 17.3 57.8 8.7X +Parquet Vectorized 8970 / 9122 1.8 570.3 1.0X +Parquet Vectorized (Pushdown) 471 / 491 33.4 30.0 19.0X +Native ORC Vectorized 7661 / 7853 2.1 487.0 1.2X +Native ORC Vectorized (Pushdown) 1134 / 1161 13.9 72.1 7.9X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 0 string row ('7864320' < value < '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 8039 / 8779 2.0 511.1 1.0X -Parquet Vectorized (Pushdown) 269 / 279 58.5 17.1 29.9X -Native ORC Vectorized 7224 / 7367 2.2 459.3 1.1X -Native ORC Vectorized (Pushdown) 927 / 953 17.0 59.0 8.7X +Parquet Vectorized 9246 / 9297 1.7 587.8 1.0X +Parquet Vectorized (Pushdown) 480 / 488 32.8 30.5 19.3X +Native ORC Vectorized 7838 / 7850 2.0 498.3 1.2X +Native ORC Vectorized (Pushdown) 1054 / 1118 14.9 67.0 8.8X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 string row (value = '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 8071 / 8164 1.9 513.2 1.0X -Parquet Vectorized (Pushdown) 257 / 268 61.3 16.3 31.5X -Native ORC Vectorized 7121 / 7240 2.2 452.8 1.1X -Native ORC Vectorized (Pushdown) 866 / 907 18.2 55.0 9.3X +Parquet Vectorized 8989 / 9100 1.7 571.5 1.0X +Parquet Vectorized (Pushdown) 448 / 467 35.1 28.5 20.1X +Native ORC Vectorized 7680 / 7768 2.0 488.3 1.2X +Native ORC Vectorized (Pushdown) 1067 / 1118 14.7 67.8 8.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 string row (value <=> '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7981 / 8097 2.0 507.4 1.0X -Parquet Vectorized (Pushdown) 251 / 268 62.6 16.0 31.8X -Native ORC Vectorized 7128 / 7221 2.2 453.2 1.1X -Native ORC Vectorized (Pushdown) 866 / 926 18.2 55.0 9.2X +Parquet Vectorized 9115 / 9266 1.7 579.5 1.0X +Parquet Vectorized (Pushdown) 466 / 492 33.7 29.7 19.5X +Native ORC Vectorized 7800 / 7914 2.0 495.9 1.2X +Native ORC Vectorized (Pushdown) 1075 / 1102 14.6 68.4 8.5X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 string row ('7864320' <= value <= '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 8032 / 8180 2.0 510.6 1.0X -Parquet Vectorized (Pushdown) 256 / 263 61.3 16.3 31.3X -Native ORC Vectorized 7234 / 7299 2.2 459.9 1.1X -Native ORC Vectorized (Pushdown) 889 / 917 17.7 56.5 9.0X +Parquet Vectorized 9099 / 9237 1.7 578.5 1.0X +Parquet Vectorized (Pushdown) 462 / 475 34.1 29.3 19.7X +Native ORC Vectorized 7847 / 7925 2.0 498.9 1.2X +Native ORC Vectorized (Pushdown) 1078 / 1114 14.6 68.5 8.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select all string rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 16913 / 16974 0.9 1075.3 1.0X -Parquet Vectorized (Pushdown) 16763 / 16853 0.9 1065.8 1.0X -Native ORC Vectorized 16546 / 16680 1.0 1052.0 1.0X -Native ORC Vectorized (Pushdown) 16796 / 16989 0.9 1067.9 1.0X +Parquet Vectorized 19303 / 19547 0.8 1227.3 1.0X +Parquet Vectorized (Pushdown) 19924 / 20089 0.8 1266.7 1.0X +Native ORC Vectorized 18725 / 19079 0.8 1190.5 1.0X +Native ORC Vectorized (Pushdown) 19310 / 19492 0.8 1227.7 1.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 0 int row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7213 / 7272 2.2 458.6 1.0X -Parquet Vectorized (Pushdown) 251 / 258 62.7 16.0 28.7X -Native ORC Vectorized 6490 / 7068 2.4 412.6 1.1X -Native ORC Vectorized (Pushdown) 880 / 910 17.9 56.0 8.2X +Parquet Vectorized 8117 / 8323 1.9 516.1 1.0X +Parquet Vectorized (Pushdown) 484 / 494 32.5 30.8 16.8X +Native ORC Vectorized 6811 / 7036 2.3 433.0 1.2X +Native ORC Vectorized (Pushdown) 1061 / 1082 14.8 67.5 7.6X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 0 int row (7864320 < value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7206 / 7316 2.2 458.1 1.0X -Parquet Vectorized (Pushdown) 264 / 270 59.5 16.8 27.3X -Native ORC Vectorized 6415 / 6454 2.5 407.8 1.1X -Native ORC Vectorized (Pushdown) 884 / 916 17.8 56.2 8.2X +Parquet Vectorized 8105 / 8140 1.9 515.3 1.0X +Parquet Vectorized (Pushdown) 478 / 505 32.9 30.4 17.0X +Native ORC Vectorized 6914 / 7211 2.3 439.6 1.2X +Native ORC Vectorized (Pushdown) 1044 / 1064 15.1 66.4 7.8X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 int row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7217 / 7354 2.2 458.8 1.0X -Parquet Vectorized (Pushdown) 257 / 268 61.1 16.4 28.0X -Native ORC Vectorized 6437 / 6572 2.4 409.3 1.1X -Native ORC Vectorized (Pushdown) 900 / 919 17.5 57.2 8.0X +Parquet Vectorized 7983 / 8116 2.0 507.6 1.0X +Parquet Vectorized (Pushdown) 464 / 487 33.9 29.5 17.2X +Native ORC Vectorized 6703 / 6774 2.3 426.1 1.2X +Native ORC Vectorized (Pushdown) 1017 / 1058 15.5 64.6 7.9X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 int row (value <=> 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7330 / 7461 2.1 466.0 1.0X -Parquet Vectorized (Pushdown) 255 / 268 61.6 16.2 28.7X -Native ORC Vectorized 6467 / 6525 2.4 411.1 1.1X -Native ORC Vectorized (Pushdown) 886 / 924 17.8 56.3 8.3X +Parquet Vectorized 7942 / 7983 2.0 504.9 1.0X +Parquet Vectorized (Pushdown) 468 / 479 33.6 29.7 17.0X +Native ORC Vectorized 6677 / 6779 2.4 424.5 1.2X +Native ORC Vectorized (Pushdown) 1021 / 1068 15.4 64.9 7.8X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 int row (7864320 <= value <= 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7128 / 7237 2.2 453.2 1.0X -Parquet Vectorized (Pushdown) 265 / 273 59.5 16.8 26.9X -Native ORC Vectorized 6471 / 6594 2.4 411.4 1.1X -Native ORC Vectorized (Pushdown) 872 / 884 18.0 55.4 8.2X +Parquet Vectorized 7909 / 7958 2.0 502.8 1.0X +Parquet Vectorized (Pushdown) 485 / 494 32.4 30.8 16.3X +Native ORC Vectorized 6751 / 6846 2.3 429.2 1.2X +Native ORC Vectorized (Pushdown) 1043 / 1077 15.1 66.3 7.6X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 int row (7864319 < value < 7864321): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7187 / 7318 2.2 456.9 1.0X -Parquet Vectorized (Pushdown) 249 / 266 63.1 15.9 28.8X -Native ORC Vectorized 6522 / 6577 2.4 414.7 1.1X -Native ORC Vectorized (Pushdown) 858 / 885 18.3 54.5 8.4X +Parquet Vectorized 8010 / 8033 2.0 509.2 1.0X +Parquet Vectorized (Pushdown) 472 / 489 33.3 30.0 17.0X +Native ORC Vectorized 6655 / 6808 2.4 423.1 1.2X +Native ORC Vectorized (Pushdown) 1015 / 1067 15.5 64.5 7.9X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 10% int rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 8165 / 8313 1.9 519.1 1.0X -Parquet Vectorized (Pushdown) 1795 / 1825 8.8 114.1 4.5X -Native ORC Vectorized 7420 / 7597 2.1 471.8 1.1X -Native ORC Vectorized (Pushdown) 2374 / 2437 6.6 150.9 3.4X +Parquet Vectorized 8983 / 9035 1.8 571.1 1.0X +Parquet Vectorized (Pushdown) 2204 / 2231 7.1 140.1 4.1X +Native ORC Vectorized 7864 / 8011 2.0 500.0 1.1X +Native ORC Vectorized (Pushdown) 2674 / 2789 5.9 170.0 3.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 50% int rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 11249 / 11496 1.4 715.2 1.0X -Parquet Vectorized (Pushdown) 7719 / 7874 2.0 490.8 1.5X -Native ORC Vectorized 10676 / 10895 1.5 678.7 1.1X -Native ORC Vectorized (Pushdown) 8184 / 8294 1.9 520.3 1.4X +Parquet Vectorized 12723 / 12903 1.2 808.9 1.0X +Parquet Vectorized (Pushdown) 9112 / 9282 1.7 579.3 1.4X +Native ORC Vectorized 12090 / 12230 1.3 768.7 1.1X +Native ORC Vectorized (Pushdown) 9242 / 9372 1.7 587.6 1.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 90% int rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 14400 / 14492 1.1 915.5 1.0X -Parquet Vectorized (Pushdown) 13817 / 13927 1.1 878.5 1.0X -Native ORC Vectorized 14192 / 14247 1.1 902.3 1.0X -Native ORC Vectorized (Pushdown) 13963 / 14044 1.1 887.8 1.0X +Parquet Vectorized 16453 / 16678 1.0 1046.1 1.0X +Parquet Vectorized (Pushdown) 15997 / 16262 1.0 1017.0 1.0X +Native ORC Vectorized 16652 / 17070 0.9 1058.7 1.0X +Native ORC Vectorized (Pushdown) 15843 / 16112 1.0 1007.2 1.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select all int rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 15036 / 15229 1.0 956.0 1.0X -Parquet Vectorized (Pushdown) 15283 / 15367 1.0 971.7 1.0X -Native ORC Vectorized 14722 / 14863 1.1 936.0 1.0X -Native ORC Vectorized (Pushdown) 15217 / 15287 1.0 967.5 1.0X +Parquet Vectorized 17098 / 17254 0.9 1087.1 1.0X +Parquet Vectorized (Pushdown) 17302 / 17529 0.9 1100.1 1.0X +Native ORC Vectorized 16790 / 17098 0.9 1067.5 1.0X +Native ORC Vectorized (Pushdown) 17329 / 17914 0.9 1101.7 1.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select all int rows (value > -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 15316 / 15478 1.0 973.8 1.0X -Parquet Vectorized (Pushdown) 15115 / 15220 1.0 961.0 1.0X -Native ORC Vectorized 14869 / 14992 1.1 945.3 1.0X -Native ORC Vectorized (Pushdown) 15163 / 15373 1.0 964.0 1.0X +Parquet Vectorized 17088 / 17392 0.9 1086.4 1.0X +Parquet Vectorized (Pushdown) 17609 / 17863 0.9 1119.5 1.0X +Native ORC Vectorized 18334 / 69831 0.9 1165.7 0.9X +Native ORC Vectorized (Pushdown) 17465 / 17629 0.9 1110.4 1.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select all int rows (value != -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 15137 / 15239 1.0 962.4 1.0X -Parquet Vectorized (Pushdown) 15240 / 15350 1.0 968.9 1.0X -Native ORC Vectorized 14746 / 15074 1.1 937.5 1.0X -Native ORC Vectorized (Pushdown) 15089 / 15170 1.0 959.4 1.0X +Parquet Vectorized 16903 / 17233 0.9 1074.6 1.0X +Parquet Vectorized (Pushdown) 16945 / 17032 0.9 1077.3 1.0X +Native ORC Vectorized 16377 / 16762 1.0 1041.2 1.0X +Native ORC Vectorized (Pushdown) 16950 / 17212 0.9 1077.7 1.0X + + +================================================================================================ +Pushdown for few distinct value case (use dictionary encoding) +================================================================================================ -###############[ Pushdown for few distinct value case (use dictionary encoding) ]################ Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 0 distinct string row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 6965 / 7053 2.3 442.8 1.0X -Parquet Vectorized (Pushdown) 219 / 235 71.8 13.9 31.8X -Native ORC Vectorized 6444 / 6668 2.4 409.7 1.1X -Native ORC Vectorized (Pushdown) 862 / 893 18.2 54.8 8.1X +Parquet Vectorized 7245 / 7322 2.2 460.7 1.0X +Parquet Vectorized (Pushdown) 378 / 389 41.6 24.0 19.2X +Native ORC Vectorized 6720 / 6778 2.3 427.2 1.1X +Native ORC Vectorized (Pushdown) 1009 / 1032 15.6 64.2 7.2X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 0 distinct string row ('100' < value < '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7318 / 7366 2.1 465.3 1.0X -Parquet Vectorized (Pushdown) 219 / 230 71.8 13.9 33.4X -Native ORC Vectorized 6857 / 6945 2.3 435.9 1.1X -Native ORC Vectorized (Pushdown) 868 / 910 18.1 55.2 8.4X +Parquet Vectorized 7627 / 7795 2.1 484.9 1.0X +Parquet Vectorized (Pushdown) 384 / 406 41.0 24.4 19.9X +Native ORC Vectorized 6724 / 7824 2.3 427.5 1.1X +Native ORC Vectorized (Pushdown) 968 / 986 16.3 61.5 7.9X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 distinct string row (value = '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7281 / 7357 2.2 462.9 1.0X -Parquet Vectorized (Pushdown) 397 / 407 39.6 25.3 18.3X -Native ORC Vectorized 6819 / 6903 2.3 433.5 1.1X -Native ORC Vectorized (Pushdown) 1056 / 1081 14.9 67.2 6.9X +Parquet Vectorized 7157 / 7534 2.2 455.0 1.0X +Parquet Vectorized (Pushdown) 542 / 565 29.0 34.5 13.2X +Native ORC Vectorized 6716 / 7214 2.3 427.0 1.1X +Native ORC Vectorized (Pushdown) 1212 / 1288 13.0 77.0 5.9X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 distinct string row (value <=> '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7218 / 7342 2.2 458.9 1.0X -Parquet Vectorized (Pushdown) 390 / 401 40.3 24.8 18.5X -Native ORC Vectorized 6768 / 6830 2.3 430.3 1.1X -Native ORC Vectorized (Pushdown) 1044 / 1073 15.1 66.4 6.9X +Parquet Vectorized 7368 / 7552 2.1 468.4 1.0X +Parquet Vectorized (Pushdown) 544 / 556 28.9 34.6 13.5X +Native ORC Vectorized 6740 / 6867 2.3 428.5 1.1X +Native ORC Vectorized (Pushdown) 1230 / 1426 12.8 78.2 6.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 distinct string row ('100' <= value <= '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7501 / 7575 2.1 476.9 1.0X -Parquet Vectorized (Pushdown) 398 / 414 39.5 25.3 18.9X -Native ORC Vectorized 6964 / 7046 2.3 442.8 1.1X -Native ORC Vectorized (Pushdown) 1053 / 1100 14.9 67.0 7.1X +Parquet Vectorized 7427 / 7734 2.1 472.2 1.0X +Parquet Vectorized (Pushdown) 556 / 568 28.3 35.4 13.3X +Native ORC Vectorized 6847 / 7059 2.3 435.3 1.1X +Native ORC Vectorized (Pushdown) 1226 / 1230 12.8 77.9 6.1X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select all distinct string rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 16690 / 16840 0.9 1061.1 1.0X -Parquet Vectorized (Pushdown) 16688 / 16787 0.9 1061.0 1.0X -Native ORC Vectorized 16252 / 16518 1.0 1033.3 1.0X -Native ORC Vectorized (Pushdown) 16537 / 16948 1.0 1051.4 1.0X +Parquet Vectorized 16998 / 17311 0.9 1080.7 1.0X +Parquet Vectorized (Pushdown) 16977 / 17250 0.9 1079.4 1.0X +Native ORC Vectorized 18447 / 19852 0.9 1172.8 0.9X +Native ORC Vectorized (Pushdown) 16614 / 17102 0.9 1056.3 1.0X + + +================================================================================================ +Pushdown benchmark for StringStartsWith +================================================================================================ -###########################[ Pushdown benchmark for StringStartsWith ]########################### Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz StringStartsWith filter: (value like '10%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 10104 / 11125 1.6 642.4 1.0X -Parquet Vectorized (Pushdown) 3002 / 3608 5.2 190.8 3.4X -Native ORC Vectorized 9589 / 10454 1.6 609.7 1.1X -Native ORC Vectorized (Pushdown) 9798 / 10509 1.6 622.9 1.0X +Parquet Vectorized 9705 / 10814 1.6 617.0 1.0X +Parquet Vectorized (Pushdown) 3086 / 3574 5.1 196.2 3.1X +Native ORC Vectorized 10094 / 10695 1.6 641.8 1.0X +Native ORC Vectorized (Pushdown) 9611 / 9999 1.6 611.0 1.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz StringStartsWith filter: (value like '1000%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 8437 / 8563 1.9 536.4 1.0X -Parquet Vectorized (Pushdown) 279 / 289 56.3 17.8 30.2X -Native ORC Vectorized 7354 / 7568 2.1 467.5 1.1X -Native ORC Vectorized (Pushdown) 7730 / 7972 2.0 491.4 1.1X +Parquet Vectorized 8016 / 8183 2.0 509.7 1.0X +Parquet Vectorized (Pushdown) 444 / 457 35.4 28.2 18.0X +Native ORC Vectorized 6970 / 7169 2.3 443.2 1.2X +Native ORC Vectorized (Pushdown) 7447 / 7503 2.1 473.5 1.1X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz StringStartsWith filter: (value like '786432%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 8290 / 8510 1.9 527.0 1.0X -Parquet Vectorized (Pushdown) 260 / 272 60.5 16.5 31.9X -Native ORC Vectorized 7361 / 7395 2.1 468.0 1.1X -Native ORC Vectorized (Pushdown) 7694 / 7811 2.0 489.2 1.1X +Parquet Vectorized 7908 / 8046 2.0 502.8 1.0X +Parquet Vectorized (Pushdown) 408 / 429 38.6 25.9 19.4X +Native ORC Vectorized 7021 / 7100 2.2 446.4 1.1X +Native ORC Vectorized (Pushdown) 7310 / 7490 2.2 464.8 1.1X + + +================================================================================================ +Pushdown benchmark for decimal +================================================================================================ -###############################[ Pushdown benchmark for decimal ]################################ Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 decimal(9, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 3621 / 3686 4.3 230.2 1.0X -Parquet Vectorized (Pushdown) 3616 / 3666 4.3 229.9 1.0X -Native ORC Vectorized 3976 / 4067 4.0 252.8 0.9X -Native ORC Vectorized (Pushdown) 623 / 643 25.3 39.6 5.8X +Parquet Vectorized 3785 / 3867 4.2 240.6 1.0X +Parquet Vectorized (Pushdown) 3820 / 3928 4.1 242.9 1.0X +Native ORC Vectorized 3981 / 4049 4.0 253.1 1.0X +Native ORC Vectorized (Pushdown) 702 / 735 22.4 44.6 5.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 10% decimal(9, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 4510 / 4640 3.5 286.7 1.0X -Parquet Vectorized (Pushdown) 4495 / 4575 3.5 285.8 1.0X -Native ORC Vectorized 4854 / 4946 3.2 308.6 0.9X -Native ORC Vectorized (Pushdown) 1889 / 1988 8.3 120.1 2.4X +Parquet Vectorized 4694 / 4813 3.4 298.4 1.0X +Parquet Vectorized (Pushdown) 4839 / 4907 3.3 307.6 1.0X +Native ORC Vectorized 4943 / 5032 3.2 314.2 0.9X +Native ORC Vectorized (Pushdown) 2043 / 2085 7.7 129.9 2.3X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 50% decimal(9, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7918 / 8018 2.0 503.4 1.0X -Parquet Vectorized (Pushdown) 7914 / 8027 2.0 503.2 1.0X -Native ORC Vectorized 8342 / 8512 1.9 530.3 0.9X -Native ORC Vectorized (Pushdown) 7224 / 7320 2.2 459.3 1.1X +Parquet Vectorized 8321 / 8472 1.9 529.0 1.0X +Parquet Vectorized (Pushdown) 8125 / 8471 1.9 516.6 1.0X +Native ORC Vectorized 8524 / 8616 1.8 541.9 1.0X +Native ORC Vectorized (Pushdown) 7961 / 8383 2.0 506.1 1.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 90% decimal(9, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 9301 / 9348 1.7 591.3 1.0X -Parquet Vectorized (Pushdown) 9239 / 9338 1.7 587.4 1.0X -Native ORC Vectorized 9741 / 9901 1.6 619.3 1.0X -Native ORC Vectorized (Pushdown) 9146 / 9335 1.7 581.5 1.0X +Parquet Vectorized 9587 / 10112 1.6 609.5 1.0X +Parquet Vectorized (Pushdown) 9726 / 10370 1.6 618.3 1.0X +Native ORC Vectorized 10119 / 11147 1.6 643.4 0.9X +Native ORC Vectorized (Pushdown) 9366 / 9497 1.7 595.5 1.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 decimal(18, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 3893 / 3947 4.0 247.5 1.0X -Parquet Vectorized (Pushdown) 3898 / 3936 4.0 247.8 1.0X -Native ORC Vectorized 4728 / 4808 3.3 300.6 0.8X -Native ORC Vectorized (Pushdown) 777 / 810 20.2 49.4 5.0X +Parquet Vectorized 4060 / 4093 3.9 258.1 1.0X +Parquet Vectorized (Pushdown) 4037 / 4125 3.9 256.6 1.0X +Native ORC Vectorized 4756 / 4811 3.3 302.4 0.9X +Native ORC Vectorized (Pushdown) 824 / 889 19.1 52.4 4.9X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 10% decimal(18, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 4835 / 4857 3.3 307.4 1.0X -Parquet Vectorized (Pushdown) 4788 / 4863 3.3 304.4 1.0X -Native ORC Vectorized 5551 / 5664 2.8 352.9 0.9X -Native ORC Vectorized (Pushdown) 2074 / 2134 7.6 131.9 2.3X +Parquet Vectorized 5157 / 5271 3.0 327.9 1.0X +Parquet Vectorized (Pushdown) 5051 / 5141 3.1 321.1 1.0X +Native ORC Vectorized 5723 / 6146 2.7 363.9 0.9X +Native ORC Vectorized (Pushdown) 2198 / 2317 7.2 139.8 2.3X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 50% decimal(18, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 8243 / 8317 1.9 524.1 1.0X -Parquet Vectorized (Pushdown) 8277 / 8314 1.9 526.3 1.0X -Native ORC Vectorized 9190 / 9335 1.7 584.3 0.9X -Native ORC Vectorized (Pushdown) 7426 / 7507 2.1 472.1 1.1X +Parquet Vectorized 8608 / 8647 1.8 547.3 1.0X +Parquet Vectorized (Pushdown) 8471 / 8584 1.9 538.6 1.0X +Native ORC Vectorized 9249 / 10048 1.7 588.0 0.9X +Native ORC Vectorized (Pushdown) 7645 / 8091 2.1 486.1 1.1X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 90% decimal(18, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 11441 / 11561 1.4 727.4 1.0X -Parquet Vectorized (Pushdown) 11386 / 11666 1.4 723.9 1.0X -Native ORC Vectorized 12756 / 12920 1.2 811.0 0.9X -Native ORC Vectorized (Pushdown) 12638 / 12817 1.2 803.5 0.9X +Parquet Vectorized 11658 / 11888 1.3 741.2 1.0X +Parquet Vectorized (Pushdown) 11812 / 12098 1.3 751.0 1.0X +Native ORC Vectorized 12943 / 13312 1.2 822.9 0.9X +Native ORC Vectorized (Pushdown) 13139 / 13465 1.2 835.4 0.9X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 decimal(38, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 5344 / 5393 2.9 339.8 1.0X -Parquet Vectorized (Pushdown) 5239 / 5406 3.0 333.1 1.0X -Native ORC Vectorized 4675 / 4742 3.4 297.2 1.1X -Native ORC Vectorized (Pushdown) 801 / 836 19.6 50.9 6.7X +Parquet Vectorized 5491 / 5716 2.9 349.1 1.0X +Parquet Vectorized (Pushdown) 5515 / 5615 2.9 350.6 1.0X +Native ORC Vectorized 4582 / 4654 3.4 291.3 1.2X +Native ORC Vectorized (Pushdown) 815 / 861 19.3 51.8 6.7X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 10% decimal(38, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 6424 / 6507 2.4 408.4 1.0X -Parquet Vectorized (Pushdown) 6364 / 6465 2.5 404.6 1.0X -Native ORC Vectorized 5744 / 5847 2.7 365.2 1.1X -Native ORC Vectorized (Pushdown) 2221 / 2249 7.1 141.2 2.9X +Parquet Vectorized 6432 / 6527 2.4 409.0 1.0X +Parquet Vectorized (Pushdown) 6513 / 6607 2.4 414.1 1.0X +Native ORC Vectorized 5618 / 6085 2.8 357.2 1.1X +Native ORC Vectorized (Pushdown) 2403 / 2443 6.5 152.8 2.7X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 50% decimal(38, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 10524 / 10642 1.5 669.1 1.0X -Parquet Vectorized (Pushdown) 10510 / 10634 1.5 668.2 1.0X -Native ORC Vectorized 9796 / 9892 1.6 622.8 1.1X -Native ORC Vectorized (Pushdown) 8035 / 8160 2.0 510.9 1.3X +Parquet Vectorized 11041 / 11467 1.4 701.9 1.0X +Parquet Vectorized (Pushdown) 10909 / 11484 1.4 693.5 1.0X +Native ORC Vectorized 9860 / 10436 1.6 626.9 1.1X +Native ORC Vectorized (Pushdown) 7908 / 8069 2.0 502.8 1.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 90% decimal(38, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 14674 / 14807 1.1 933.0 1.0X -Parquet Vectorized (Pushdown) 14567 / 14703 1.1 926.1 1.0X -Native ORC Vectorized 13811 / 13906 1.1 878.1 1.1X -Native ORC Vectorized (Pushdown) 13785 / 13853 1.1 876.4 1.1X +Parquet Vectorized 14816 / 16877 1.1 942.0 1.0X +Parquet Vectorized (Pushdown) 15383 / 15740 1.0 978.0 1.0X +Native ORC Vectorized 14408 / 14771 1.1 916.0 1.0X +Native ORC Vectorized (Pushdown) 13968 / 14805 1.1 888.1 1.1X + + +================================================================================================ +Pushdown benchmark for InSet -> InFilters +================================================================================================ -##########################[ Pushdown benchmark for InSet -> InFilters ]########################## Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 5, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7657 / 7918 2.1 486.8 1.0X -Parquet Vectorized (Pushdown) 7581 / 7658 2.1 482.0 1.0X -Native ORC Vectorized 7338 / 7682 2.1 466.6 1.0X -Native ORC Vectorized (Pushdown) 1345 / 1354 11.7 85.5 5.7X +Parquet Vectorized 7477 / 7587 2.1 475.4 1.0X +Parquet Vectorized (Pushdown) 7862 / 8346 2.0 499.9 1.0X +Native ORC Vectorized 6447 / 7021 2.4 409.9 1.2X +Native ORC Vectorized (Pushdown) 983 / 1003 16.0 62.5 7.6X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 5, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7647 / 7707 2.1 486.2 1.0X -Parquet Vectorized (Pushdown) 7600 / 7758 2.1 483.2 1.0X -Native ORC Vectorized 7376 / 8002 2.1 469.0 1.0X -Native ORC Vectorized (Pushdown) 1324 / 1359 11.9 84.2 5.8X +Parquet Vectorized 7107 / 7290 2.2 451.9 1.0X +Parquet Vectorized (Pushdown) 7196 / 7258 2.2 457.5 1.0X +Native ORC Vectorized 6102 / 6222 2.6 388.0 1.2X +Native ORC Vectorized (Pushdown) 926 / 958 17.0 58.9 7.7X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 5, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7587 / 7642 2.1 482.3 1.0X -Parquet Vectorized (Pushdown) 7570 / 7631 2.1 481.3 1.0X -Native ORC Vectorized 7440 / 8116 2.1 473.0 1.0X -Native ORC Vectorized (Pushdown) 1325 / 1338 11.9 84.2 5.7X +Parquet Vectorized 7374 / 7692 2.1 468.8 1.0X +Parquet Vectorized (Pushdown) 7771 / 7848 2.0 494.1 0.9X +Native ORC Vectorized 6184 / 6356 2.5 393.2 1.2X +Native ORC Vectorized (Pushdown) 920 / 963 17.1 58.5 8.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 10, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7613 / 8941 2.1 484.0 1.0X -Parquet Vectorized (Pushdown) 7657 / 7777 2.1 486.8 1.0X -Native ORC Vectorized 7422 / 8243 2.1 471.9 1.0X -Native ORC Vectorized (Pushdown) 1361 / 1463 11.6 86.6 5.6X +Parquet Vectorized 7073 / 7326 2.2 449.7 1.0X +Parquet Vectorized (Pushdown) 7304 / 7647 2.2 464.4 1.0X +Native ORC Vectorized 6222 / 6579 2.5 395.6 1.1X +Native ORC Vectorized (Pushdown) 958 / 994 16.4 60.9 7.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 10, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7643 / 7719 2.1 485.9 1.0X -Parquet Vectorized (Pushdown) 7598 / 7644 2.1 483.1 1.0X -Native ORC Vectorized 7311 / 7404 2.2 464.8 1.0X -Native ORC Vectorized (Pushdown) 1279 / 1313 12.3 81.3 6.0X +Parquet Vectorized 7121 / 7501 2.2 452.7 1.0X +Parquet Vectorized (Pushdown) 7751 / 8334 2.0 492.8 0.9X +Native ORC Vectorized 6225 / 6680 2.5 395.8 1.1X +Native ORC Vectorized (Pushdown) 998 / 1020 15.8 63.5 7.1X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 10, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7580 / 7649 2.1 482.0 1.0X -Parquet Vectorized (Pushdown) 7553 / 7634 2.1 480.2 1.0X -Native ORC Vectorized 7477 / 7576 2.1 475.4 1.0X -Native ORC Vectorized (Pushdown) 1317 / 1361 11.9 83.7 5.8X +Parquet Vectorized 7157 / 7399 2.2 455.1 1.0X +Parquet Vectorized (Pushdown) 7806 / 7911 2.0 496.3 0.9X +Native ORC Vectorized 6548 / 6720 2.4 416.3 1.1X +Native ORC Vectorized (Pushdown) 1016 / 1050 15.5 64.6 7.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 50, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7783 / 7850 2.0 494.8 1.0X -Parquet Vectorized (Pushdown) 7866 / 7876 2.0 500.1 1.0X -Native ORC Vectorized 7633 / 7750 2.1 485.3 1.0X -Native ORC Vectorized (Pushdown) 1432 / 1457 11.0 91.0 5.4X +Parquet Vectorized 7662 / 7805 2.1 487.1 1.0X +Parquet Vectorized (Pushdown) 7590 / 7861 2.1 482.5 1.0X +Native ORC Vectorized 6840 / 8073 2.3 434.9 1.1X +Native ORC Vectorized (Pushdown) 1041 / 1075 15.1 66.2 7.4X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 50, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7785 / 7854 2.0 495.0 1.0X -Parquet Vectorized (Pushdown) 7860 / 7874 2.0 499.7 1.0X -Native ORC Vectorized 7665 / 7712 2.1 487.3 1.0X -Native ORC Vectorized (Pushdown) 1457 / 1479 10.8 92.6 5.3X +Parquet Vectorized 8230 / 9266 1.9 523.2 1.0X +Parquet Vectorized (Pushdown) 7735 / 7960 2.0 491.8 1.1X +Native ORC Vectorized 6945 / 7109 2.3 441.6 1.2X +Native ORC Vectorized (Pushdown) 1123 / 1144 14.0 71.4 7.3X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 50, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7868 / 7900 2.0 500.2 1.0X -Parquet Vectorized (Pushdown) 7832 / 7888 2.0 498.0 1.0X -Native ORC Vectorized 7678 / 7716 2.0 488.2 1.0X -Native ORC Vectorized (Pushdown) 1473 / 1540 10.7 93.6 5.3X +Parquet Vectorized 7656 / 8058 2.1 486.7 1.0X +Parquet Vectorized (Pushdown) 7860 / 8247 2.0 499.7 1.0X +Native ORC Vectorized 6684 / 7003 2.4 424.9 1.1X +Native ORC Vectorized (Pushdown) 1085 / 1172 14.5 69.0 7.1X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 100, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7739 / 7761 2.0 492.0 1.0X -Parquet Vectorized (Pushdown) 7733 / 7778 2.0 491.6 1.0X -Native ORC Vectorized 7549 / 7596 2.1 480.0 1.0X -Native ORC Vectorized (Pushdown) 1536 / 1544 10.2 97.7 5.0X +Parquet Vectorized 7594 / 8128 2.1 482.8 1.0X +Parquet Vectorized (Pushdown) 7845 / 7923 2.0 498.8 1.0X +Native ORC Vectorized 5859 / 6421 2.7 372.5 1.3X +Native ORC Vectorized (Pushdown) 1037 / 1054 15.2 66.0 7.3X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 100, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7794 / 7806 2.0 495.5 1.0X -Parquet Vectorized (Pushdown) 7799 / 7880 2.0 495.8 1.0X -Native ORC Vectorized 7576 / 7599 2.1 481.7 1.0X -Native ORC Vectorized (Pushdown) 1554 / 1576 10.1 98.8 5.0X +Parquet Vectorized 6762 / 6775 2.3 429.9 1.0X +Parquet Vectorized (Pushdown) 6911 / 6970 2.3 439.4 1.0X +Native ORC Vectorized 5884 / 5960 2.7 374.1 1.1X +Native ORC Vectorized (Pushdown) 1028 / 1052 15.3 65.4 6.6X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz InSet -> InFilters (values count: 100, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 7778 / 7832 2.0 494.5 1.0X -Parquet Vectorized (Pushdown) 7772 / 7837 2.0 494.1 1.0X -Native ORC Vectorized 7615 / 7647 2.1 484.1 1.0X -Native ORC Vectorized (Pushdown) 1541 / 1577 10.2 98.0 5.0X +Parquet Vectorized 6718 / 6767 2.3 427.1 1.0X +Parquet Vectorized (Pushdown) 6812 / 6909 2.3 433.1 1.0X +Native ORC Vectorized 5842 / 5883 2.7 371.4 1.1X +Native ORC Vectorized (Pushdown) 1040 / 1058 15.1 66.1 6.5X + + +================================================================================================ +Pushdown benchmark for tinyint +================================================================================================ -###############################[ Pushdown benchmark for tinyint ]################################ Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz Select 1 tinyint row (value = CAST(63 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 4477 / 4626 3.5 284.6 1.0X -Parquet Vectorized (Pushdown) 4329 / 4366 3.6 275.2 1.0X -Native ORC Vectorized 3584 / 3679 4.4 227.9 1.2X -Native ORC Vectorized (Pushdown) 641 / 986 24.5 40.8 7.0X +Parquet Vectorized 3726 / 3775 4.2 236.9 1.0X +Parquet Vectorized (Pushdown) 3741 / 3789 4.2 237.9 1.0X +Native ORC Vectorized 2793 / 2909 5.6 177.6 1.3X +Native ORC Vectorized (Pushdown) 530 / 561 29.7 33.7 7.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -Select 10% tinyint rows (value < CAST(12 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +Select 10% tinyint rows (value < CAST(12 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 5007 / 5511 3.1 318.3 1.0X -Parquet Vectorized (Pushdown) 5093 / 5159 3.1 323.8 1.0X -Native ORC Vectorized 4388 / 4844 3.6 279.0 1.1X -Native ORC Vectorized (Pushdown) 1753 / 2271 9.0 111.5 2.9X +Parquet Vectorized 4385 / 4406 3.6 278.8 1.0X +Parquet Vectorized (Pushdown) 4398 / 4454 3.6 279.6 1.0X +Native ORC Vectorized 3420 / 3501 4.6 217.4 1.3X +Native ORC Vectorized (Pushdown) 1395 / 1432 11.3 88.7 3.1X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -Select 50% tinyint rows (value < CAST(63 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +Select 50% tinyint rows (value < CAST(63 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 8766 / 10085 1.8 557.3 1.0X -Parquet Vectorized (Pushdown) 9732 / 11845 1.6 618.7 0.9X -Native ORC Vectorized 7782 / 10141 2.0 494.8 1.1X -Native ORC Vectorized (Pushdown) 6643 / 10009 2.4 422.4 1.3X +Parquet Vectorized 7307 / 7394 2.2 464.6 1.0X +Parquet Vectorized (Pushdown) 7411 / 7461 2.1 471.2 1.0X +Native ORC Vectorized 6501 / 7814 2.4 413.4 1.1X +Native ORC Vectorized (Pushdown) 7341 / 8637 2.1 466.7 1.0X Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz -Select 90% tinyint rows (value < CAST(114 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +Select 90% tinyint rows (value < CAST(114 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Parquet Vectorized 12071 / 16726 1.3 767.4 1.0X -Parquet Vectorized (Pushdown) 11776 / 12005 1.3 748.7 1.0X -Native ORC Vectorized 13914 / 17542 1.1 884.6 0.9X -Native ORC Vectorized (Pushdown) 11173 / 12826 1.4 710.4 1.1X +Parquet Vectorized 11886 / 13122 1.3 755.7 1.0X +Parquet Vectorized (Pushdown) 12557 / 14173 1.3 798.4 0.9X +Native ORC Vectorized 10758 / 11971 1.5 684.0 1.1X +Native ORC Vectorized (Pushdown) 10564 / 10713 1.5 671.6 1.1X + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala index c48182d2e51e..fc716dec9f33 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala @@ -65,15 +65,22 @@ class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfter override def beforeEach(td: TestData) { super.beforeEach(td) - val testName = "[ " + td.name + " ]" - val halfLength = (97 - testName.length) / 2 - val testHeader = (("".padTo(halfLength, '#') + testName).padTo(97, '#') + "\n").getBytes + val separator = "=" * 96 + val testHeader = (separator + '\n' + td.name + '\n' + separator + '\n' + '\n').getBytes out.write(testHeader) } + override def afterEach(td: TestData) { + out.write('\n') + super.afterEach(td) + } + override def afterAll() { - super.afterAll() - out.close() + try { + out.close() + } finally { + super.afterAll() + } } def withTempPath(f: File => Unit): Unit = {