diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt index e680ddff53dd..d93a4e94771d 100644 --- a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt +++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt @@ -2,669 +2,698 @@ Pushdown for many distinct value case ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 0 string row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 11405 / 11485 1.4 725.1 1.0X -Parquet Vectorized (Pushdown) 675 / 690 23.3 42.9 16.9X -Native ORC Vectorized 7127 / 7170 2.2 453.1 1.6X -Native ORC Vectorized (Pushdown) 519 / 541 30.3 33.0 22.0X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 0 string row ('7864320' < value < '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 11457 / 11473 1.4 728.4 1.0X -Parquet Vectorized (Pushdown) 656 / 686 24.0 41.7 17.5X -Native ORC Vectorized 7328 / 7342 2.1 465.9 1.6X -Native ORC Vectorized (Pushdown) 539 / 565 29.2 34.2 21.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 string row (value = '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 11878 / 11888 1.3 755.2 1.0X -Parquet Vectorized (Pushdown) 630 / 654 25.0 40.1 18.9X -Native ORC Vectorized 7342 / 7362 2.1 466.8 1.6X -Native ORC Vectorized (Pushdown) 519 / 537 30.3 33.0 22.9X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 string row (value <=> '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 11423 / 11440 1.4 726.2 1.0X -Parquet Vectorized (Pushdown) 625 / 643 25.2 39.7 18.3X -Native ORC Vectorized 7315 / 7335 2.2 465.1 1.6X -Native ORC Vectorized (Pushdown) 507 / 520 31.0 32.2 22.5X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 string row ('7864320' <= value <= '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 11440 / 11478 1.4 727.3 1.0X -Parquet Vectorized (Pushdown) 634 / 652 24.8 40.3 18.0X -Native ORC Vectorized 7311 / 7324 2.2 464.8 1.6X -Native ORC Vectorized (Pushdown) 517 / 548 30.4 32.8 22.1X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select all string rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 20750 / 20872 0.8 1319.3 1.0X -Parquet Vectorized (Pushdown) 21002 / 21032 0.7 1335.3 1.0X -Native ORC Vectorized 16714 / 16742 0.9 1062.6 1.2X -Native ORC Vectorized (Pushdown) 16926 / 16965 0.9 1076.1 1.2X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 0 int row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10510 / 10532 1.5 668.2 1.0X -Parquet Vectorized (Pushdown) 642 / 665 24.5 40.8 16.4X -Native ORC Vectorized 6609 / 6618 2.4 420.2 1.6X -Native ORC Vectorized (Pushdown) 502 / 512 31.4 31.9 21.0X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 0 int row (7864320 < value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10505 / 10514 1.5 667.9 1.0X -Parquet Vectorized (Pushdown) 659 / 673 23.9 41.9 15.9X -Native ORC Vectorized 6634 / 6641 2.4 421.8 1.6X -Native ORC Vectorized (Pushdown) 513 / 526 30.7 32.6 20.5X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 int row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10555 / 10570 1.5 671.1 1.0X -Parquet Vectorized (Pushdown) 651 / 668 24.2 41.4 16.2X -Native ORC Vectorized 6721 / 6728 2.3 427.3 1.6X -Native ORC Vectorized (Pushdown) 508 / 519 31.0 32.3 20.8X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 int row (value <=> 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10556 / 10566 1.5 671.1 1.0X -Parquet Vectorized (Pushdown) 647 / 654 24.3 41.1 16.3X -Native ORC Vectorized 6716 / 6728 2.3 427.0 1.6X -Native ORC Vectorized (Pushdown) 510 / 521 30.9 32.4 20.7X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 int row (7864320 <= value <= 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10556 / 10565 1.5 671.1 1.0X -Parquet Vectorized (Pushdown) 649 / 654 24.2 41.3 16.3X -Native ORC Vectorized 6700 / 6712 2.3 426.0 1.6X -Native ORC Vectorized (Pushdown) 509 / 520 30.9 32.3 20.8X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 int row (7864319 < value < 7864321): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10547 / 10566 1.5 670.5 1.0X -Parquet Vectorized (Pushdown) 649 / 653 24.2 41.3 16.3X -Native ORC Vectorized 6703 / 6713 2.3 426.2 1.6X -Native ORC Vectorized (Pushdown) 510 / 520 30.8 32.5 20.7X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 10% int rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 11478 / 11525 1.4 729.7 1.0X -Parquet Vectorized (Pushdown) 2576 / 2587 6.1 163.8 4.5X -Native ORC Vectorized 7633 / 7657 2.1 485.3 1.5X -Native ORC Vectorized (Pushdown) 2076 / 2096 7.6 132.0 5.5X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 50% int rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 14785 / 14802 1.1 940.0 1.0X -Parquet Vectorized (Pushdown) 9971 / 9977 1.6 633.9 1.5X -Native ORC Vectorized 11082 / 11107 1.4 704.6 1.3X -Native ORC Vectorized (Pushdown) 8061 / 8073 2.0 512.5 1.8X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 90% int rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 18174 / 18214 0.9 1155.5 1.0X -Parquet Vectorized (Pushdown) 17387 / 17403 0.9 1105.5 1.0X -Native ORC Vectorized 14465 / 14492 1.1 919.7 1.3X -Native ORC Vectorized (Pushdown) 14024 / 14041 1.1 891.6 1.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select all int rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 19004 / 19014 0.8 1208.2 1.0X -Parquet Vectorized (Pushdown) 19219 / 19232 0.8 1221.9 1.0X -Native ORC Vectorized 15266 / 15290 1.0 970.6 1.2X -Native ORC Vectorized (Pushdown) 15469 / 15482 1.0 983.5 1.2X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select all int rows (value > -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 19036 / 19052 0.8 1210.3 1.0X -Parquet Vectorized (Pushdown) 19287 / 19306 0.8 1226.2 1.0X -Native ORC Vectorized 15311 / 15371 1.0 973.5 1.2X -Native ORC Vectorized (Pushdown) 15517 / 15590 1.0 986.5 1.2X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select all int rows (value != -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 19072 / 19102 0.8 1212.6 1.0X -Parquet Vectorized (Pushdown) 19288 / 19318 0.8 1226.3 1.0X -Native ORC Vectorized 15277 / 15293 1.0 971.3 1.2X -Native ORC Vectorized (Pushdown) 15479 / 15499 1.0 984.1 1.2X +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 0 string row (value IS NULL): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 8100 8441 338 1.9 515.0 1.0X +Parquet Vectorized (Pushdown) 508 554 30 31.0 32.3 16.0X +Native ORC Vectorized 5747 5950 195 2.7 365.4 1.4X +Native ORC Vectorized (Pushdown) 585 699 76 26.9 37.2 13.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 0 string row ('7864320' < value < '7864320'): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 8001 8183 142 2.0 508.7 1.0X +Parquet Vectorized (Pushdown) 498 536 37 31.6 31.7 16.1X +Native ORC Vectorized 5907 6064 160 2.7 375.6 1.4X +Native ORC Vectorized (Pushdown) 542 593 34 29.0 34.5 14.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 string row (value = '7864320'): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7999 8356 247 2.0 508.6 1.0X +Parquet Vectorized (Pushdown) 471 484 11 33.4 30.0 17.0X +Native ORC Vectorized 5856 5921 45 2.7 372.3 1.4X +Native ORC Vectorized (Pushdown) 531 541 17 29.6 33.7 15.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 string row (value <=> '7864320'): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 8061 8398 451 2.0 512.5 1.0X +Parquet Vectorized (Pushdown) 462 479 17 34.1 29.4 17.5X +Native ORC Vectorized 5778 5843 86 2.7 367.4 1.4X +Native ORC Vectorized (Pushdown) 620 684 39 25.4 39.4 13.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 string row ('7864320' <= value <= '7864320'): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 8056 8216 163 2.0 512.2 1.0X +Parquet Vectorized (Pushdown) 465 488 19 33.8 29.6 17.3X +Native ORC Vectorized 5793 5844 68 2.7 368.3 1.4X +Native ORC Vectorized (Pushdown) 538 579 26 29.2 34.2 15.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select all string rows (value IS NOT NULL): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 14531 14600 52 1.1 923.8 1.0X +Parquet Vectorized (Pushdown) 15076 15285 189 1.0 958.5 1.0X +Native ORC Vectorized 12828 13009 233 1.2 815.6 1.1X +Native ORC Vectorized (Pushdown) 13072 13253 201 1.2 831.1 1.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 0 int row (value IS NULL): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7431 7880 418 2.1 472.4 1.0X +Parquet Vectorized (Pushdown) 453 456 2 34.7 28.8 16.4X +Native ORC Vectorized 5157 5669 447 3.0 327.9 1.4X +Native ORC Vectorized (Pushdown) 559 572 14 28.1 35.5 13.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 0 int row (7864320 < value < 7864320): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7441 7552 143 2.1 473.1 1.0X +Parquet Vectorized (Pushdown) 466 470 5 33.8 29.6 16.0X +Native ORC Vectorized 5238 5307 51 3.0 333.0 1.4X +Native ORC Vectorized (Pushdown) 557 571 15 28.3 35.4 13.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 int row (value = 7864320): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7447 7502 33 2.1 473.5 1.0X +Parquet Vectorized (Pushdown) 460 465 6 34.2 29.2 16.2X +Native ORC Vectorized 5212 5320 88 3.0 331.4 1.4X +Native ORC Vectorized (Pushdown) 647 764 120 24.3 41.1 11.5X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 int row (value <=> 7864320): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7573 7877 429 2.1 481.5 1.0X +Parquet Vectorized (Pushdown) 468 474 4 33.6 29.8 16.2X +Native ORC Vectorized 5338 5471 122 2.9 339.4 1.4X +Native ORC Vectorized (Pushdown) 560 595 25 28.1 35.6 13.5X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 int row (7864320 <= value <= 7864320): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7589 7680 102 2.1 482.5 1.0X +Parquet Vectorized (Pushdown) 471 472 2 33.4 29.9 16.1X +Native ORC Vectorized 5380 5764 344 2.9 342.0 1.4X +Native ORC Vectorized (Pushdown) 576 594 16 27.3 36.6 13.2X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 int row (7864319 < value < 7864321): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7540 7585 32 2.1 479.4 1.0X +Parquet Vectorized (Pushdown) 469 471 2 33.5 29.8 16.1X +Native ORC Vectorized 5437 5499 73 2.9 345.7 1.4X +Native ORC Vectorized (Pushdown) 653 705 34 24.1 41.5 11.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 10% int rows (value < 1572864): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 8989 9867 981 1.7 571.5 1.0X +Parquet Vectorized (Pushdown) 2197 2224 17 7.2 139.7 4.1X +Native ORC Vectorized 6612 7005 454 2.4 420.4 1.4X +Native ORC Vectorized (Pushdown) 2060 2136 66 7.6 131.0 4.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 50% int rows (value < 7864320): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 11024 11815 959 1.4 700.9 1.0X +Parquet Vectorized (Pushdown) 7368 7556 325 2.1 468.4 1.5X +Native ORC Vectorized 9328 9436 120 1.7 593.0 1.2X +Native ORC Vectorized (Pushdown) 6613 6662 35 2.4 420.4 1.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 90% int rows (value < 14155776): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 13358 13383 16 1.2 849.3 1.0X +Parquet Vectorized (Pushdown) 12788 13091 336 1.2 813.0 1.0X +Native ORC Vectorized 11443 12081 574 1.4 727.5 1.2X +Native ORC Vectorized (Pushdown) 11158 11180 16 1.4 709.4 1.2X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select all int rows (value IS NOT NULL): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 14090 14979 693 1.1 895.8 1.0X +Parquet Vectorized (Pushdown) 14540 15283 474 1.1 924.5 1.0X +Native ORC Vectorized 12602 12754 280 1.2 801.2 1.1X +Native ORC Vectorized (Pushdown) 12633 13019 367 1.2 803.2 1.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select all int rows (value > -1): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 14068 14383 217 1.1 894.4 1.0X +Parquet Vectorized (Pushdown) 14228 15038 781 1.1 904.6 1.0X +Native ORC Vectorized 12845 13208 413 1.2 816.7 1.1X +Native ORC Vectorized (Pushdown) 12985 13223 190 1.2 825.6 1.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select all int rows (value != -1): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 14607 15004 284 1.1 928.7 1.0X +Parquet Vectorized (Pushdown) 15216 15625 670 1.0 967.4 1.0X +Native ORC Vectorized 12779 13432 814 1.2 812.5 1.1X +Native ORC Vectorized (Pushdown) 13001 13294 232 1.2 826.6 1.1X ================================================================================================ Pushdown for few distinct value case (use dictionary encoding) ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 0 distinct string row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10250 / 10274 1.5 651.7 1.0X -Parquet Vectorized (Pushdown) 571 / 576 27.5 36.3 17.9X -Native ORC Vectorized 8651 / 8660 1.8 550.0 1.2X -Native ORC Vectorized (Pushdown) 909 / 933 17.3 57.8 11.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 0 distinct string row ('100' < value < '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10420 / 10426 1.5 662.5 1.0X -Parquet Vectorized (Pushdown) 574 / 579 27.4 36.5 18.2X -Native ORC Vectorized 8973 / 8982 1.8 570.5 1.2X -Native ORC Vectorized (Pushdown) 916 / 955 17.2 58.2 11.4X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 distinct string row (value = '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10428 / 10441 1.5 663.0 1.0X -Parquet Vectorized (Pushdown) 789 / 809 19.9 50.2 13.2X -Native ORC Vectorized 9042 / 9055 1.7 574.9 1.2X -Native ORC Vectorized (Pushdown) 1130 / 1145 13.9 71.8 9.2X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 distinct string row (value <=> '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10402 / 10416 1.5 661.3 1.0X -Parquet Vectorized (Pushdown) 791 / 806 19.9 50.3 13.2X -Native ORC Vectorized 9042 / 9055 1.7 574.9 1.2X -Native ORC Vectorized (Pushdown) 1112 / 1145 14.1 70.7 9.4X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 distinct string row ('100' <= value <= '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10548 / 10563 1.5 670.6 1.0X -Parquet Vectorized (Pushdown) 790 / 796 19.9 50.2 13.4X -Native ORC Vectorized 9144 / 9153 1.7 581.3 1.2X -Native ORC Vectorized (Pushdown) 1117 / 1148 14.1 71.0 9.4X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select all distinct string rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 20445 / 20469 0.8 1299.8 1.0X -Parquet Vectorized (Pushdown) 20686 / 20699 0.8 1315.2 1.0X -Native ORC Vectorized 18851 / 18953 0.8 1198.5 1.1X -Native ORC Vectorized (Pushdown) 19255 / 19268 0.8 1224.2 1.1X +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 0 distinct string row (value IS NULL): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7446 7615 210 2.1 473.4 1.0X +Parquet Vectorized (Pushdown) 422 458 73 37.3 26.8 17.7X +Native ORC Vectorized 6842 7387 585 2.3 435.0 1.1X +Native ORC Vectorized (Pushdown) 1313 1437 145 12.0 83.5 5.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 0 distinct string row ('100' < value < '100'): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7669 8303 494 2.1 487.6 1.0X +Parquet Vectorized (Pushdown) 426 434 9 36.9 27.1 18.0X +Native ORC Vectorized 7791 8283 318 2.0 495.3 1.0X +Native ORC Vectorized (Pushdown) 1368 1389 21 11.5 87.0 5.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 distinct string row (value = '100'): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7680 7743 97 2.0 488.3 1.0X +Parquet Vectorized (Pushdown) 595 605 10 26.4 37.8 12.9X +Native ORC Vectorized 7314 7785 320 2.2 465.0 1.1X +Native ORC Vectorized (Pushdown) 1646 1847 161 9.6 104.6 4.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 distinct string row (value <=> '100'): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 8254 8356 111 1.9 524.7 1.0X +Parquet Vectorized (Pushdown) 640 649 9 24.6 40.7 12.9X +Native ORC Vectorized 7206 7780 473 2.2 458.2 1.1X +Native ORC Vectorized (Pushdown) 1706 1795 57 9.2 108.5 4.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 distinct string row ('100' <= value <= '100'): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 8482 9206 752 1.9 539.3 1.0X +Parquet Vectorized (Pushdown) 623 669 31 25.2 39.6 13.6X +Native ORC Vectorized 7762 7962 272 2.0 493.5 1.1X +Native ORC Vectorized (Pushdown) 1523 1578 36 10.3 96.8 5.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select all distinct string rows (value IS NOT NULL): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 16069 16332 214 1.0 1021.7 1.0X +Parquet Vectorized (Pushdown) 16387 16448 57 1.0 1041.8 1.0X +Native ORC Vectorized 15257 15783 813 1.0 970.0 1.1X +Native ORC Vectorized (Pushdown) 17282 18955 NaN 0.9 1098.7 0.9X ================================================================================================ Pushdown benchmark for StringStartsWith ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -StringStartsWith filter: (value like '10%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 14265 / 15213 1.1 907.0 1.0X -Parquet Vectorized (Pushdown) 4228 / 4870 3.7 268.8 3.4X -Native ORC Vectorized 10116 / 10977 1.6 643.2 1.4X -Native ORC Vectorized (Pushdown) 10653 / 11376 1.5 677.3 1.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -StringStartsWith filter: (value like '1000%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 11499 / 11539 1.4 731.1 1.0X -Parquet Vectorized (Pushdown) 669 / 672 23.5 42.5 17.2X -Native ORC Vectorized 7343 / 7363 2.1 466.8 1.6X -Native ORC Vectorized (Pushdown) 7559 / 7568 2.1 480.6 1.5X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -StringStartsWith filter: (value like '786432%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 11463 / 11468 1.4 728.8 1.0X -Parquet Vectorized (Pushdown) 647 / 651 24.3 41.1 17.7X -Native ORC Vectorized 7322 / 7338 2.1 465.5 1.6X -Native ORC Vectorized (Pushdown) 7533 / 7544 2.1 478.9 1.5X +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +StringStartsWith filter: (value like '10%'): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 10929 11386 525 1.4 694.8 1.0X +Parquet Vectorized (Pushdown) 3649 3934 254 4.3 232.0 3.0X +Native ORC Vectorized 8790 9574 1093 1.8 558.8 1.2X +Native ORC Vectorized (Pushdown) 8094 9051 1366 1.9 514.6 1.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +StringStartsWith filter: (value like '1000%'): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 8090 8297 283 1.9 514.4 1.0X +Parquet Vectorized (Pushdown) 472 475 4 33.3 30.0 17.1X +Native ORC Vectorized 6023 6123 78 2.6 383.0 1.3X +Native ORC Vectorized (Pushdown) 6291 6696 352 2.5 400.0 1.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +StringStartsWith filter: (value like '786432%'): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 8210 8447 245 1.9 522.0 1.0X +Parquet Vectorized (Pushdown) 463 466 4 34.0 29.4 17.7X +Native ORC Vectorized 5822 5867 28 2.7 370.1 1.4X +Native ORC Vectorized (Pushdown) 6122 6146 15 2.6 389.2 1.3X ================================================================================================ Pushdown benchmark for decimal ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 decimal(9, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 5543 / 5564 2.8 352.4 1.0X -Parquet Vectorized (Pushdown) 168 / 174 93.7 10.7 33.0X -Native ORC Vectorized 4992 / 5052 3.2 317.4 1.1X -Native ORC Vectorized (Pushdown) 840 / 850 18.7 53.4 6.6X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 10% decimal(9, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 7312 / 7358 2.2 464.9 1.0X -Parquet Vectorized (Pushdown) 3008 / 3078 5.2 191.2 2.4X -Native ORC Vectorized 6775 / 6798 2.3 430.7 1.1X -Native ORC Vectorized (Pushdown) 6819 / 6832 2.3 433.5 1.1X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 50% decimal(9, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 13232 / 13241 1.2 841.3 1.0X -Parquet Vectorized (Pushdown) 12555 / 12569 1.3 798.2 1.1X -Native ORC Vectorized 12597 / 12627 1.2 800.9 1.1X -Native ORC Vectorized (Pushdown) 12677 / 12711 1.2 806.0 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 90% decimal(9, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 14725 / 14729 1.1 936.2 1.0X -Parquet Vectorized (Pushdown) 14781 / 14800 1.1 939.7 1.0X -Native ORC Vectorized 15360 / 15453 1.0 976.5 1.0X -Native ORC Vectorized (Pushdown) 15444 / 15466 1.0 981.9 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 decimal(18, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 5746 / 5763 2.7 365.3 1.0X -Parquet Vectorized (Pushdown) 166 / 169 94.8 10.6 34.6X -Native ORC Vectorized 5007 / 5023 3.1 318.3 1.1X -Native ORC Vectorized (Pushdown) 2629 / 2640 6.0 167.1 2.2X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 10% decimal(18, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 6827 / 6864 2.3 434.0 1.0X -Parquet Vectorized (Pushdown) 1809 / 1827 8.7 115.0 3.8X -Native ORC Vectorized 6287 / 6296 2.5 399.7 1.1X -Native ORC Vectorized (Pushdown) 6364 / 6377 2.5 404.6 1.1X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 50% decimal(18, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 11315 / 11342 1.4 719.4 1.0X -Parquet Vectorized (Pushdown) 8431 / 8450 1.9 536.0 1.3X -Native ORC Vectorized 11591 / 11611 1.4 736.9 1.0X -Native ORC Vectorized (Pushdown) 11424 / 11475 1.4 726.3 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 90% decimal(18, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 15703 / 15712 1.0 998.4 1.0X -Parquet Vectorized (Pushdown) 14982 / 15009 1.0 952.5 1.0X -Native ORC Vectorized 16887 / 16955 0.9 1073.7 0.9X -Native ORC Vectorized (Pushdown) 16518 / 16530 1.0 1050.2 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 decimal(38, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 8101 / 8130 1.9 515.1 1.0X -Parquet Vectorized (Pushdown) 184 / 187 85.6 11.7 44.1X -Native ORC Vectorized 4998 / 5027 3.1 317.8 1.6X -Native ORC Vectorized (Pushdown) 165 / 168 95.6 10.5 49.2X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 10% decimal(38, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 9405 / 9447 1.7 597.9 1.0X -Parquet Vectorized (Pushdown) 2269 / 2275 6.9 144.2 4.1X -Native ORC Vectorized 6167 / 6203 2.6 392.1 1.5X -Native ORC Vectorized (Pushdown) 1783 / 1787 8.8 113.3 5.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 50% decimal(38, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 14700 / 14707 1.1 934.6 1.0X -Parquet Vectorized (Pushdown) 10699 / 10712 1.5 680.2 1.4X -Native ORC Vectorized 10687 / 10703 1.5 679.5 1.4X -Native ORC Vectorized (Pushdown) 8364 / 8415 1.9 531.8 1.8X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 90% decimal(38, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 19780 / 19894 0.8 1257.6 1.0X -Parquet Vectorized (Pushdown) 19003 / 19025 0.8 1208.1 1.0X -Native ORC Vectorized 15385 / 15404 1.0 978.2 1.3X -Native ORC Vectorized (Pushdown) 15032 / 15060 1.0 955.7 1.3X +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 decimal(9, 2) row (value = 7864320): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 3863 4034 224 4.1 245.6 1.0X +Parquet Vectorized (Pushdown) 127 139 12 123.4 8.1 30.3X +Native ORC Vectorized 3905 4028 83 4.0 248.3 1.0X +Native ORC Vectorized (Pushdown) 742 776 27 21.2 47.2 5.2X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 10% decimal(9, 2) rows (value < 1572864): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 5416 5640 149 2.9 344.3 1.0X +Parquet Vectorized (Pushdown) 2444 2492 50 6.4 155.4 2.2X +Native ORC Vectorized 5364 5392 40 2.9 341.1 1.0X +Native ORC Vectorized (Pushdown) 5442 5487 56 2.9 346.0 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 50% decimal(9, 2) rows (value < 7864320): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 10072 10157 76 1.6 640.4 1.0X +Parquet Vectorized (Pushdown) 9638 9889 184 1.6 612.7 1.0X +Native ORC Vectorized 10192 10224 33 1.5 648.0 1.0X +Native ORC Vectorized (Pushdown) 10287 10381 173 1.5 654.1 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 90% decimal(9, 2) rows (value < 14155776): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 11307 11681 311 1.4 718.9 1.0X +Parquet Vectorized (Pushdown) 11409 11509 94 1.4 725.4 1.0X +Native ORC Vectorized 11613 12703 NaN 1.4 738.3 1.0X +Native ORC Vectorized (Pushdown) 11938 12334 525 1.3 759.0 0.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 decimal(18, 2) row (value = 7864320): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 4109 4344 168 3.8 261.2 1.0X +Parquet Vectorized (Pushdown) 134 150 13 117.7 8.5 30.7X +Native ORC Vectorized 3927 4163 192 4.0 249.7 1.0X +Native ORC Vectorized (Pushdown) 2159 2337 120 7.3 137.3 1.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 10% decimal(18, 2) rows (value < 1572864): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 5049 5301 183 3.1 321.0 1.0X +Parquet Vectorized (Pushdown) 1379 1467 89 11.4 87.7 3.7X +Native ORC Vectorized 4727 5344 637 3.3 300.5 1.1X +Native ORC Vectorized (Pushdown) 4862 5101 216 3.2 309.1 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 50% decimal(18, 2) rows (value < 7864320): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7877 8107 216 2.0 500.8 1.0X +Parquet Vectorized (Pushdown) 5771 5836 88 2.7 366.9 1.4X +Native ORC Vectorized 7684 7704 18 2.0 488.5 1.0X +Native ORC Vectorized (Pushdown) 7976 8248 262 2.0 507.1 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 90% decimal(18, 2) rows (value < 14155776): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 10875 11744 792 1.4 691.4 1.0X +Parquet Vectorized (Pushdown) 10535 11057 418 1.5 669.8 1.0X +Native ORC Vectorized 10825 11382 760 1.5 688.2 1.0X +Native ORC Vectorized (Pushdown) 11319 11950 701 1.4 719.6 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 decimal(38, 2) row (value = 7864320): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 5764 6018 262 2.7 366.5 1.0X +Parquet Vectorized (Pushdown) 138 148 8 114.0 8.8 41.8X +Native ORC Vectorized 4030 4068 42 3.9 256.2 1.4X +Native ORC Vectorized (Pushdown) 174 191 24 90.5 11.0 33.2X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 10% decimal(38, 2) rows (value < 1572864): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 6790 7280 466 2.3 431.7 1.0X +Parquet Vectorized (Pushdown) 1677 1802 197 9.4 106.6 4.0X +Native ORC Vectorized 5077 5429 333 3.1 322.8 1.3X +Native ORC Vectorized (Pushdown) 1510 1645 213 10.4 96.0 4.5X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 50% decimal(38, 2) rows (value < 7864320): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 10372 11031 543 1.5 659.4 1.0X +Parquet Vectorized (Pushdown) 7581 7959 330 2.1 482.0 1.4X +Native ORC Vectorized 8409 9048 680 1.9 534.6 1.2X +Native ORC Vectorized (Pushdown) 6551 6564 16 2.4 416.5 1.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 90% decimal(38, 2) rows (value < 14155776): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 13819 14830 924 1.1 878.6 1.0X +Parquet Vectorized (Pushdown) 14227 14856 440 1.1 904.5 1.0X +Native ORC Vectorized 12257 12701 481 1.3 779.3 1.1X +Native ORC Vectorized (Pushdown) 12108 12785 863 1.3 769.8 1.1X ================================================================================================ Pushdown benchmark for InSet -> InFilters ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 5, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10521 / 10534 1.5 668.9 1.0X -Parquet Vectorized (Pushdown) 677 / 691 23.2 43.1 15.5X -Native ORC Vectorized 6768 / 6776 2.3 430.3 1.6X -Native ORC Vectorized (Pushdown) 501 / 512 31.4 31.8 21.0X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 5, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10531 / 10538 1.5 669.5 1.0X -Parquet Vectorized (Pushdown) 677 / 718 23.2 43.0 15.6X -Native ORC Vectorized 6765 / 6773 2.3 430.1 1.6X -Native ORC Vectorized (Pushdown) 499 / 507 31.5 31.7 21.1X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 5, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10540 / 10553 1.5 670.1 1.0X -Parquet Vectorized (Pushdown) 678 / 710 23.2 43.1 15.5X -Native ORC Vectorized 6787 / 6794 2.3 431.5 1.6X -Native ORC Vectorized (Pushdown) 501 / 509 31.4 31.9 21.0X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 10, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10551 / 10559 1.5 670.8 1.0X -Parquet Vectorized (Pushdown) 703 / 708 22.4 44.7 15.0X -Native ORC Vectorized 6791 / 6802 2.3 431.7 1.6X -Native ORC Vectorized (Pushdown) 519 / 526 30.3 33.0 20.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 10, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10561 / 10565 1.5 671.4 1.0X -Parquet Vectorized (Pushdown) 711 / 716 22.1 45.2 14.9X -Native ORC Vectorized 6791 / 6806 2.3 431.8 1.6X -Native ORC Vectorized (Pushdown) 529 / 537 29.8 33.6 20.0X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 10, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10572 / 10590 1.5 672.1 1.0X -Parquet Vectorized (Pushdown) 713 / 716 22.1 45.3 14.8X -Native ORC Vectorized 6808 / 6815 2.3 432.9 1.6X -Native ORC Vectorized (Pushdown) 530 / 541 29.7 33.7 19.9X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 50, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10871 / 10882 1.4 691.2 1.0X -Parquet Vectorized (Pushdown) 11104 / 11110 1.4 706.0 1.0X -Native ORC Vectorized 7088 / 7104 2.2 450.7 1.5X -Native ORC Vectorized (Pushdown) 665 / 677 23.6 42.3 16.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 50, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10861 / 10867 1.4 690.5 1.0X -Parquet Vectorized (Pushdown) 11094 / 11099 1.4 705.3 1.0X -Native ORC Vectorized 7075 / 7092 2.2 449.8 1.5X -Native ORC Vectorized (Pushdown) 718 / 733 21.9 45.6 15.1X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 50, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10868 / 10887 1.4 691.0 1.0X -Parquet Vectorized (Pushdown) 11100 / 11106 1.4 705.7 1.0X -Native ORC Vectorized 7087 / 7093 2.2 450.6 1.5X -Native ORC Vectorized (Pushdown) 712 / 731 22.1 45.3 15.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 100, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10850 / 10888 1.4 689.8 1.0X -Parquet Vectorized (Pushdown) 11086 / 11105 1.4 704.9 1.0X -Native ORC Vectorized 7090 / 7101 2.2 450.8 1.5X -Native ORC Vectorized (Pushdown) 867 / 882 18.1 55.1 12.5X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 100, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10816 / 10819 1.5 687.7 1.0X -Parquet Vectorized (Pushdown) 11052 / 11059 1.4 702.7 1.0X -Native ORC Vectorized 7037 / 7044 2.2 447.4 1.5X -Native ORC Vectorized (Pushdown) 919 / 931 17.1 58.4 11.8X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -InSet -> InFilters (values count: 100, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10807 / 10815 1.5 687.1 1.0X -Parquet Vectorized (Pushdown) 11047 / 11054 1.4 702.4 1.0X -Native ORC Vectorized 7042 / 7047 2.2 447.7 1.5X -Native ORC Vectorized (Pushdown) 950 / 961 16.6 60.4 11.4X +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 5, distribution: 10): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7510 7922 355 2.1 477.5 1.0X +Parquet Vectorized (Pushdown) 477 533 48 33.0 30.3 15.8X +Native ORC Vectorized 5700 6337 717 2.8 362.4 1.3X +Native ORC Vectorized (Pushdown) 547 570 22 28.8 34.8 13.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 5, distribution: 50): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7592 8105 490 2.1 482.7 1.0X +Parquet Vectorized (Pushdown) 478 488 11 32.9 30.4 15.9X +Native ORC Vectorized 5564 5719 102 2.8 353.7 1.4X +Native ORC Vectorized (Pushdown) 634 686 33 24.8 40.3 12.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 5, distribution: 90): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7514 7985 436 2.1 477.7 1.0X +Parquet Vectorized (Pushdown) 491 500 6 32.0 31.2 15.3X +Native ORC Vectorized 5459 5630 143 2.9 347.1 1.4X +Native ORC Vectorized (Pushdown) 691 763 49 22.8 43.9 10.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 10, distribution: 10): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7524 8103 624 2.1 478.4 1.0X +Parquet Vectorized (Pushdown) 496 502 10 31.7 31.5 15.2X +Native ORC Vectorized 5560 5590 36 2.8 353.5 1.4X +Native ORC Vectorized (Pushdown) 611 660 43 25.7 38.8 12.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 10, distribution: 50): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7484 7928 412 2.1 475.8 1.0X +Parquet Vectorized (Pushdown) 498 508 14 31.6 31.7 15.0X +Native ORC Vectorized 5646 6189 576 2.8 359.0 1.3X +Native ORC Vectorized (Pushdown) 657 699 38 23.9 41.8 11.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 10, distribution: 90): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7717 7997 377 2.0 490.6 1.0X +Parquet Vectorized (Pushdown) 494 506 11 31.8 31.4 15.6X +Native ORC Vectorized 5492 5580 78 2.9 349.2 1.4X +Native ORC Vectorized (Pushdown) 549 614 37 28.7 34.9 14.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 50, distribution: 10): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7903 8479 364 2.0 502.4 1.0X +Parquet Vectorized (Pushdown) 7854 8040 140 2.0 499.3 1.0X +Native ORC Vectorized 5749 5943 142 2.7 365.5 1.4X +Native ORC Vectorized (Pushdown) 691 728 34 22.8 43.9 11.4X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 50, distribution: 50): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7946 8078 86 2.0 505.2 1.0X +Parquet Vectorized (Pushdown) 8074 8237 175 1.9 513.3 1.0X +Native ORC Vectorized 5715 5755 37 2.8 363.3 1.4X +Native ORC Vectorized (Pushdown) 738 795 44 21.3 46.9 10.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 50, distribution: 90): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7749 7985 140 2.0 492.7 1.0X +Parquet Vectorized (Pushdown) 7969 8210 228 2.0 506.7 1.0X +Native ORC Vectorized 5801 5949 201 2.7 368.8 1.3X +Native ORC Vectorized (Pushdown) 815 844 26 19.3 51.8 9.5X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 100, distribution: 10): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7809 7903 78 2.0 496.5 1.0X +Parquet Vectorized (Pushdown) 7917 7967 43 2.0 503.4 1.0X +Native ORC Vectorized 5744 5824 86 2.7 365.2 1.4X +Native ORC Vectorized (Pushdown) 813 855 38 19.3 51.7 9.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 100, distribution: 50): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7728 8038 340 2.0 491.3 1.0X +Parquet Vectorized (Pushdown) 7932 8013 76 2.0 504.3 1.0X +Native ORC Vectorized 5866 6185 355 2.7 373.0 1.3X +Native ORC Vectorized (Pushdown) 1016 1110 150 15.5 64.6 7.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +InSet -> InFilters (values count: 100, distribution: 90): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7726 7931 323 2.0 491.2 1.0X +Parquet Vectorized (Pushdown) 7883 7997 71 2.0 501.2 1.0X +Native ORC Vectorized 5711 6000 286 2.8 363.1 1.4X +Native ORC Vectorized (Pushdown) 941 1004 45 16.7 59.8 8.2X ================================================================================================ Pushdown benchmark for tinyint ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 tinyint row (value = CAST(63 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 6034 / 6048 2.6 383.6 1.0X -Parquet Vectorized (Pushdown) 333 / 344 47.2 21.2 18.1X -Native ORC Vectorized 3240 / 3307 4.9 206.0 1.9X -Native ORC Vectorized (Pushdown) 330 / 341 47.6 21.0 18.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 10% tinyint rows (value < CAST(12 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 6759 / 6800 2.3 429.7 1.0X -Parquet Vectorized (Pushdown) 1533 / 1537 10.3 97.5 4.4X -Native ORC Vectorized 3863 / 3874 4.1 245.6 1.7X -Native ORC Vectorized (Pushdown) 1235 / 1248 12.7 78.5 5.5X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 50% tinyint rows (value < CAST(63 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10247 / 10289 1.5 651.5 1.0X -Parquet Vectorized (Pushdown) 7430 / 7453 2.1 472.4 1.4X -Native ORC Vectorized 6995 / 7009 2.2 444.7 1.5X -Native ORC Vectorized (Pushdown) 5561 / 5571 2.8 353.6 1.8X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 90% tinyint rows (value < CAST(114 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 13949 / 13991 1.1 886.9 1.0X -Parquet Vectorized (Pushdown) 13486 / 13511 1.2 857.4 1.0X -Native ORC Vectorized 10149 / 10186 1.5 645.3 1.4X -Native ORC Vectorized (Pushdown) 9889 / 9905 1.6 628.7 1.4X +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 tinyint row (value = CAST(63 AS tinyint)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 4278 4457 144 3.7 272.0 1.0X +Parquet Vectorized (Pushdown) 250 274 26 63.0 15.9 17.1X +Native ORC Vectorized 2583 2606 25 6.1 164.2 1.7X +Native ORC Vectorized (Pushdown) 294 300 6 53.6 18.7 14.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 10% tinyint rows (value < CAST(12 AS tinyint)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 4836 4885 33 3.3 307.4 1.0X +Parquet Vectorized (Pushdown) 1158 1170 13 13.6 73.6 4.2X +Native ORC Vectorized 3135 3157 14 5.0 199.3 1.5X +Native ORC Vectorized (Pushdown) 1137 1255 145 13.8 72.3 4.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 50% tinyint rows (value < CAST(63 AS tinyint)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7498 8062 646 2.1 476.7 1.0X +Parquet Vectorized (Pushdown) 5372 5658 308 2.9 341.5 1.4X +Native ORC Vectorized 5764 5816 63 2.7 366.5 1.3X +Native ORC Vectorized (Pushdown) 4519 4557 41 3.5 287.3 1.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 90% tinyint rows (value < CAST(114 AS tinyint)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 10166 10259 92 1.5 646.3 1.0X +Parquet Vectorized (Pushdown) 9612 9819 233 1.6 611.1 1.1X +Native ORC Vectorized 8109 8423 543 1.9 515.5 1.3X +Native ORC Vectorized (Pushdown) 8153 8455 309 1.9 518.4 1.2X ================================================================================================ Pushdown benchmark for Timestamp ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 timestamp stored as INT96 row (value = CAST(7864320 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 6307 / 6310 2.5 401.0 1.0X -Parquet Vectorized (Pushdown) 6360 / 6397 2.5 404.3 1.0X -Native ORC Vectorized 2912 / 2917 5.4 185.1 2.2X -Native ORC Vectorized (Pushdown) 138 / 141 114.4 8.7 45.9X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 10% timestamp stored as INT96 rows (value < CAST(1572864 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 7225 / 7233 2.2 459.4 1.0X -Parquet Vectorized (Pushdown) 7250 / 7255 2.2 461.0 1.0X -Native ORC Vectorized 3772 / 3783 4.2 239.8 1.9X -Native ORC Vectorized (Pushdown) 1277 / 1282 12.3 81.2 5.7X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 50% timestamp stored as INT96 rows (value < CAST(7864320 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10952 / 10965 1.4 696.3 1.0X -Parquet Vectorized (Pushdown) 10985 / 10998 1.4 698.4 1.0X -Native ORC Vectorized 7178 / 7227 2.2 456.3 1.5X -Native ORC Vectorized (Pushdown) 5825 / 5830 2.7 370.3 1.9X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 90% timestamp stored as INT96 rows (value < CAST(14155776 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 14560 / 14583 1.1 925.7 1.0X -Parquet Vectorized (Pushdown) 14608 / 14620 1.1 928.7 1.0X -Native ORC Vectorized 10601 / 10640 1.5 674.0 1.4X -Native ORC Vectorized (Pushdown) 10392 / 10406 1.5 660.7 1.4X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 timestamp stored as TIMESTAMP_MICROS row (value = CAST(7864320 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 5653 / 5658 2.8 359.4 1.0X -Parquet Vectorized (Pushdown) 165 / 169 95.1 10.5 34.2X -Native ORC Vectorized 2918 / 2921 5.4 185.5 1.9X -Native ORC Vectorized (Pushdown) 137 / 145 114.9 8.7 41.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(1572864 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 6540 / 6552 2.4 415.8 1.0X -Parquet Vectorized (Pushdown) 1610 / 1614 9.8 102.3 4.1X -Native ORC Vectorized 3775 / 3788 4.2 240.0 1.7X -Native ORC Vectorized (Pushdown) 1274 / 1277 12.3 81.0 5.1X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(7864320 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10259 / 10278 1.5 652.3 1.0X -Parquet Vectorized (Pushdown) 7591 / 7601 2.1 482.6 1.4X -Native ORC Vectorized 7185 / 7194 2.2 456.8 1.4X -Native ORC Vectorized (Pushdown) 5828 / 5843 2.7 370.6 1.8X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(14155776 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 13850 / 13868 1.1 880.5 1.0X -Parquet Vectorized (Pushdown) 13433 / 13450 1.2 854.0 1.0X -Native ORC Vectorized 10635 / 10669 1.5 676.1 1.3X -Native ORC Vectorized (Pushdown) 10437 / 10448 1.5 663.6 1.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = CAST(7864320 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 5884 / 5888 2.7 374.1 1.0X -Parquet Vectorized (Pushdown) 166 / 170 94.7 10.6 35.4X -Native ORC Vectorized 2913 / 2916 5.4 185.2 2.0X -Native ORC Vectorized (Pushdown) 136 / 144 115.4 8.7 43.2X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(1572864 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 6763 / 6776 2.3 430.0 1.0X -Parquet Vectorized (Pushdown) 1634 / 1638 9.6 103.9 4.1X -Native ORC Vectorized 3777 / 3785 4.2 240.1 1.8X -Native ORC Vectorized (Pushdown) 1276 / 1279 12.3 81.2 5.3X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(7864320 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 10460 / 10469 1.5 665.0 1.0X -Parquet Vectorized (Pushdown) 7689 / 7698 2.0 488.9 1.4X -Native ORC Vectorized 7190 / 7197 2.2 457.1 1.5X -Native ORC Vectorized (Pushdown) 5820 / 5834 2.7 370.0 1.8X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(14155776 AS timestamp)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 14033 / 14039 1.1 892.2 1.0X -Parquet Vectorized (Pushdown) 13608 / 13636 1.2 865.2 1.0X -Native ORC Vectorized 10635 / 10686 1.5 676.2 1.3X -Native ORC Vectorized (Pushdown) 10420 / 10442 1.5 662.5 1.3X +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 timestamp stored as INT96 row (value = CAST(7864320 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 4399 4452 33 3.6 279.7 1.0X +Parquet Vectorized (Pushdown) 4470 4605 135 3.5 284.2 1.0X +Native ORC Vectorized 2463 2489 21 6.4 156.6 1.8X +Native ORC Vectorized (Pushdown) 139 147 11 113.3 8.8 31.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 10% timestamp stored as INT96 rows (value < CAST(1572864 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 5352 5439 90 2.9 340.2 1.0X +Parquet Vectorized (Pushdown) 5392 5550 157 2.9 342.8 1.0X +Native ORC Vectorized 3167 3232 56 5.0 201.3 1.7X +Native ORC Vectorized (Pushdown) 1112 1123 14 14.1 70.7 4.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 50% timestamp stored as INT96 rows (value < CAST(7864320 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 8098 8213 105 1.9 514.9 1.0X +Parquet Vectorized (Pushdown) 8057 8150 105 2.0 512.2 1.0X +Native ORC Vectorized 5891 6230 261 2.7 374.5 1.4X +Native ORC Vectorized (Pushdown) 4699 4701 2 3.3 298.8 1.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 90% timestamp stored as INT96 rows (value < CAST(14155776 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 11066 11205 139 1.4 703.5 1.0X +Parquet Vectorized (Pushdown) 11112 11198 112 1.4 706.5 1.0X +Native ORC Vectorized 8515 8727 265 1.8 541.4 1.3X +Native ORC Vectorized (Pushdown) 8437 8583 139 1.9 536.4 1.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 timestamp stored as TIMESTAMP_MICROS row (value = CAST(7864320 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 3992 4046 36 3.9 253.8 1.0X +Parquet Vectorized (Pushdown) 140 160 15 112.6 8.9 28.6X +Native ORC Vectorized 2505 2559 37 6.3 159.3 1.6X +Native ORC Vectorized (Pushdown) 162 203 34 97.0 10.3 24.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(1572864 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 5711 6297 669 2.8 363.1 1.0X +Parquet Vectorized (Pushdown) 1361 1432 80 11.6 86.5 4.2X +Native ORC Vectorized 3161 3201 53 5.0 201.0 1.8X +Native ORC Vectorized (Pushdown) 1080 1101 22 14.6 68.6 5.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(7864320 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7536 7606 67 2.1 479.1 1.0X +Parquet Vectorized (Pushdown) 5648 5722 72 2.8 359.1 1.3X +Native ORC Vectorized 5892 5994 73 2.7 374.6 1.3X +Native ORC Vectorized (Pushdown) 4785 4812 28 3.3 304.2 1.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(14155776 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 10681 11005 301 1.5 679.1 1.0X +Parquet Vectorized (Pushdown) 10423 10802 354 1.5 662.7 1.0X +Native ORC Vectorized 8518 8644 150 1.8 541.6 1.3X +Native ORC Vectorized (Pushdown) 8348 8725 316 1.9 530.7 1.3X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = CAST(7864320 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 4131 4211 81 3.8 262.6 1.0X +Parquet Vectorized (Pushdown) 126 138 10 124.7 8.0 32.8X +Native ORC Vectorized 2419 2488 41 6.5 153.8 1.7X +Native ORC Vectorized (Pushdown) 144 163 21 109.4 9.1 28.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(1572864 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 5153 6281 798 3.1 327.6 1.0X +Parquet Vectorized (Pushdown) 1298 1327 22 12.1 82.5 4.0X +Native ORC Vectorized 3153 3161 10 5.0 200.5 1.6X +Native ORC Vectorized (Pushdown) 1082 1096 16 14.5 68.8 4.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(7864320 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 7488 7676 235 2.1 476.1 1.0X +Parquet Vectorized (Pushdown) 5525 5730 168 2.8 351.3 1.4X +Native ORC Vectorized 5824 5887 47 2.7 370.3 1.3X +Native ORC Vectorized (Pushdown) 4771 4934 240 3.3 303.3 1.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(14155776 AS timestamp)): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 10954 11454 608 1.4 696.4 1.0X +Parquet Vectorized (Pushdown) 10483 10666 158 1.5 666.5 1.0X +Native ORC Vectorized 8698 9089 347 1.8 553.0 1.3X +Native ORC Vectorized (Pushdown) 8693 8849 152 1.8 552.7 1.3X ================================================================================================ Pushdown benchmark with many filters ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 row with 1 filters: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 319 / 323 0.0 318789986.0 1.0X -Parquet Vectorized (Pushdown) 323 / 347 0.0 322755287.0 1.0X -Native ORC Vectorized 316 / 336 0.0 315670745.0 1.0X -Native ORC Vectorized (Pushdown) 317 / 320 0.0 317392594.0 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 row with 250 filters: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 2192 / 2218 0.0 2191883823.0 1.0X -Parquet Vectorized (Pushdown) 2675 / 2687 0.0 2675439029.0 0.8X -Native ORC Vectorized 2158 / 2162 0.0 2157646071.0 1.0X -Native ORC Vectorized (Pushdown) 2309 / 2326 0.0 2309096612.0 0.9X - -OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -Select 1 row with 500 filters: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------- -Parquet Vectorized 6219 / 6248 0.0 6218727737.0 1.0X -Parquet Vectorized (Pushdown) 7376 / 7436 0.0 7375977710.0 0.8X -Native ORC Vectorized 6252 / 6279 0.0 6252473320.0 1.0X -Native ORC Vectorized (Pushdown) 6858 / 6876 0.0 6857854486.0 0.9X +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 row with 1 filters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 176 182 6 0.0 175773031.0 1.0X +Parquet Vectorized (Pushdown) 176 183 9 0.0 175756838.0 1.0X +Native ORC Vectorized 166 171 5 0.0 166445958.0 1.1X +Native ORC Vectorized (Pushdown) 164 177 11 0.0 163698943.0 1.1X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 row with 250 filters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 899 925 18 0.0 898922847.0 1.0X +Parquet Vectorized (Pushdown) 1009 1032 40 0.0 1008502749.0 0.9X +Native ORC Vectorized 895 917 15 0.0 894757505.0 1.0X +Native ORC Vectorized (Pushdown) 873 879 8 0.0 872941160.0 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select 1 row with 500 filters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Parquet Vectorized 2541 2745 244 0.0 2540810863.0 1.0X +Parquet Vectorized (Pushdown) 2814 2878 54 0.0 2813909827.0 0.9X +Native ORC Vectorized 2422 2470 50 0.0 2422264909.0 1.0X +Native ORC Vectorized (Pushdown) 2500 2544 39 0.0 2499656374.0 1.0X + + +================================================================================================ +Predicate conversion benchmark with unbalanced Column +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Convert filters to ORC filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Convert 2500 filters to ORC filter 2 2 0 93.0 10.8 1.0X +Convert 5000 filters to ORC filter 3 4 1 47.8 20.9 0.5X +Convert 15000 filters to ORC filter 13 14 2 12.0 83.1 0.1X + + +================================================================================================ +Pushdown benchmark with unbalanced Column +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14.5 +Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz +Select data with filters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Convert 100 filters to ORC filter 195 215 19 0.8 1220.7 1.0X +Convert 100 filters to Parquet filter 525 540 21 0.3 3280.5 0.4X +Convert 500 filters to ORC filter 940 999 60 0.2 5877.7 0.2X +Convert 500 filters to Parquet filter 1370 1452 98 0.1 8565.0 0.1X +Convert 1000 filters to ORC filter 4516 4758 182 0.0 28222.9 0.0X +Convert 1000 filters to Parquet filter 5456 5615 151 0.0 34100.6 0.0X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala index b04024371713..13c2df2afcc6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala @@ -24,12 +24,16 @@ import scala.util.Random import org.apache.spark.SparkConf import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} import org.apache.spark.internal.config.UI._ -import org.apache.spark.sql.{DataFrame, SparkSession} +import org.apache.spark.sql.{Column, DataFrame, SparkSession} +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, Literal, Or} import org.apache.spark.sql.catalyst.plans.SQLHelper -import org.apache.spark.sql.functions.monotonically_increasing_id +import org.apache.spark.sql.execution.datasources.DataSourceStrategy +import org.apache.spark.sql.execution.datasources.orc.OrcFilters +import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType -import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType, TimestampType} +import org.apache.spark.sql.types._ /** * Benchmark to measure read performance with Filter pushdown. @@ -135,6 +139,33 @@ object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper { benchmark.run() } + def filterPushDownBenchmarkWithColumn( + benchmark: Benchmark, + numFilter: Int, + values: Int, + whereColumn: Column, + selectExpr: String = "*" + ): Unit = { + benchmark.addCase(s"Convert $numFilter filters to ORC filter") { _ => + withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true") { + spark + .table("orcTable") + .select(selectExpr) + .filter(whereColumn) + .collect() + } + } + benchmark.addCase(s"Convert $numFilter filters to Parquet filter") { _ => + withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> "true") { + spark + .table("parquetTable") + .select(selectExpr) + .filter(whereColumn) + .collect() + } + } + } + private def runIntBenchmark(numRows: Int, width: Int, mid: Int): Unit = { Seq("value IS NULL", s"$mid < value AND value < $mid").foreach { whereExpr => val title = s"Select 0 int row ($whereExpr)".replace("value AND value", "value") @@ -174,10 +205,10 @@ object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper { private def runStringBenchmark( numRows: Int, width: Int, searchValue: Int, colType: String): Unit = { Seq("value IS NULL", s"'$searchValue' < value AND value < '$searchValue'") - .foreach { whereExpr => - val title = s"Select 0 $colType row ($whereExpr)".replace("value AND value", "value") - filterPushDownBenchmark(numRows, title, whereExpr) - } + .foreach { whereExpr => + val title = s"Select 0 $colType row ($whereExpr)".replace("value AND value", "value") + filterPushDownBenchmark(numRows, title, whereExpr) + } Seq( s"value = '$searchValue'", @@ -378,5 +409,60 @@ object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper { } } } + + runBenchmark(s"Predicate conversion benchmark with unbalanced Column") { + // This benchmark tests a very isolated part of the predicate pushdown process - specifically, + // the individual action of converting a Spark `Expression` to an ORC `SearchArgument`. + // This results in more granular numbers that can help highlight small performance + // differences in this part of the code that would be hidden by slower components that + // get run when a full Spark job is executed. + // The benchmark below runs a more complete, end-to-end test which covers the whole pipeline + // and can uncover high-level performance problems, but is bad at discriminating details. + val numRows = 160000 + val width = 2000 + + val columns = (1 to width).map(i => s"id c$i") + val df = spark.range(numRows).selectExpr(columns: _*) + val benchmark = new Benchmark( + s"Convert filters to ORC filter", + numRows, minNumIters = 5, output = output) + Seq(2500, 5000, 15000).foreach { numFilter => + val whereColumn = (1 to numFilter) + .map(i => col("c1") === lit(i)) + .foldLeft(lit(false))(_ || _) + + benchmark.addCase(s"Convert $numFilter filters to ORC filter") { _ => + OrcFilters.createFilter(df.schema, + DataSourceStrategy.translateFilter(whereColumn.expr).toSeq) + } + } + benchmark.run() + } + + runBenchmark(s"Pushdown benchmark with unbalanced Column") { + val numRows = 160000 + val width = 200 + + withTempPath { dir => + val columns = (1 to width).map(i => s"id c$i") + val df = spark.range(numRows).selectExpr(columns: _*) + withTempTable("orcTable", "parquetTable") { + saveAsTable(df, dir) + val benchmark = + new Benchmark("Select data with filters", numRows, minNumIters = 5, output = output) + Seq(100, 500, 1000).foreach { numFilter => + val whereColumn = (1 to numFilter) + .map(i => col("c1") === lit(i)) + .foldLeft(lit(false))(_ || _) + filterPushDownBenchmarkWithColumn( + benchmark, + numFilter, + numRows, + whereColumn) + } + benchmark.run() + } + } + } } } diff --git a/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala index 9e4bf22ff7e3..61143df24346 100644 --- a/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala +++ b/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala @@ -66,7 +66,9 @@ private[sql] object OrcFilters extends OrcFiltersBase { for { // Combines all convertible filters using `And` to produce a single conjunction conjunction <- buildTree(convertibleFilters(schema, dataTypeMap, filters)) - // Then tries to build a single ORC `SearchArgument` for the conjunction predicate + // Then tries to build a single ORC `SearchArgument` for the conjunction predicate. + // The input predicate is fully convertible. There should not be any empty result in the + // following recursive method call `buildSearchArgument`. builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder) } yield builder.build() } @@ -80,6 +82,17 @@ private[sql] object OrcFilters extends OrcFiltersBase { def convertibleFiltersHelper( filter: Filter, canPartialPushDown: Boolean): Option[Filter] = filter match { + // At here, it is not safe to just convert one side and remove the other side + // if we do not understand what the parent filters are. + // + // Here is an example used to explain the reason. + // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to + // convert b in ('1'). If we only convert a = 2, we will end up with a filter + // NOT(a = 2), which will generate wrong results. + // + // Pushing one side of AND down is only safe to do at the top level or in the child + // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate + // can be safely removed. case And(left, right) => val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown) val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown) @@ -90,6 +103,17 @@ private[sql] object OrcFilters extends OrcFiltersBase { case _ => None } + // The Or predicate is convertible when both of its children can be pushed down. + // That is to say, if one/both of the children can be partially pushed down, the Or + // predicate can be partially pushed down as well. + // + // Here is an example used to explain the reason. + // Let's say we have + // (a1 AND a2) OR (b1 AND b2), + // a1 and b1 is convertible, while a2 and b2 is not. + // The predicate can be converted as + // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2) + // As per the logical in And predicate, we can push down (a1 OR b1). case Or(left, right) => val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown) val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown) @@ -150,23 +174,19 @@ private[sql] object OrcFilters extends OrcFiltersBase { dataTypeMap: Map[String, DataType], expression: Filter, builder: Builder): Option[Builder] = { - createBuilder(dataTypeMap, expression, builder, canPartialPushDownConjuncts = true) + createBuilder(dataTypeMap, expression, builder) } /** * @param dataTypeMap a map from the attribute name to its data type. * @param expression the input filter predicates. * @param builder the input SearchArgument.Builder. - * @param canPartialPushDownConjuncts whether a subset of conjuncts of predicates can be pushed - * down safely. Pushing ONLY one side of AND down is safe to - * do at the top level or none of its ancestors is NOT and OR. * @return the builder so far. */ private def createBuilder( dataTypeMap: Map[String, DataType], expression: Filter, - builder: Builder, - canPartialPushDownConjuncts: Boolean): Option[Builder] = { + builder: Builder): Option[Builder] = { def getType(attribute: String): PredicateLeaf.Type = getPredicateLeafType(dataTypeMap(attribute)) @@ -174,63 +194,20 @@ private[sql] object OrcFilters extends OrcFiltersBase { expression match { case And(left, right) => - // At here, it is not safe to just convert one side and remove the other side - // if we do not understand what the parent filters are. - // - // Here is an example used to explain the reason. - // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to - // convert b in ('1'). If we only convert a = 2, we will end up with a filter - // NOT(a = 2), which will generate wrong results. - // - // Pushing one side of AND down is only safe to do at the top level or in the child - // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate - // can be safely removed. - val leftBuilderOption = - createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts) - val rightBuilderOption = - createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts) - (leftBuilderOption, rightBuilderOption) match { - case (Some(_), Some(_)) => - for { - lhs <- createBuilder(dataTypeMap, left, - builder.startAnd(), canPartialPushDownConjuncts) - rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts) - } yield rhs.end() - - case (Some(_), None) if canPartialPushDownConjuncts => - createBuilder(dataTypeMap, left, builder, canPartialPushDownConjuncts) - - case (None, Some(_)) if canPartialPushDownConjuncts => - createBuilder(dataTypeMap, right, builder, canPartialPushDownConjuncts) - - case _ => None - } + for { + lhs <- createBuilder(dataTypeMap, left, builder.startAnd()) + rhs <- createBuilder(dataTypeMap, right, lhs) + } yield rhs.end() case Or(left, right) => - // The Or predicate is convertible when both of its children can be pushed down. - // That is to say, if one/both of the children can be partially pushed down, the Or - // predicate can be partially pushed down as well. - // - // Here is an example used to explain the reason. - // Let's say we have - // (a1 AND a2) OR (b1 AND b2), - // a1 and b1 is convertible, while a2 and b2 is not. - // The predicate can be converted as - // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2) - // As per the logical in And predicate, we can push down (a1 OR b1). for { - _ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts) - _ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts) - lhs <- createBuilder(dataTypeMap, left, - builder.startOr(), canPartialPushDownConjuncts) - rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts) + lhs <- createBuilder(dataTypeMap, left, builder.startOr()) + rhs <- createBuilder(dataTypeMap, right, lhs) } yield rhs.end() case Not(child) => for { - _ <- createBuilder(dataTypeMap, child, newBuilder, canPartialPushDownConjuncts = false) - negate <- createBuilder(dataTypeMap, - child, builder.startNot(), canPartialPushDownConjuncts = false) + negate <- createBuilder(dataTypeMap, child, builder.startNot()) } yield negate.end() // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()` diff --git a/sql/core/v2.3.5/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v2.3.5/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala index 632a72a32abd..68329e2902ee 100644 --- a/sql/core/v2.3.5/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala +++ b/sql/core/v2.3.5/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala @@ -66,7 +66,9 @@ private[sql] object OrcFilters extends OrcFiltersBase { for { // Combines all convertible filters using `And` to produce a single conjunction conjunction <- buildTree(convertibleFilters(schema, dataTypeMap, filters)) - // Then tries to build a single ORC `SearchArgument` for the conjunction predicate + // Then tries to build a single ORC `SearchArgument` for the conjunction predicate. + // The input predicate is fully convertible. There should not be any empty result in the + // following recursive method call `buildSearchArgument`. builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder) } yield builder.build() } @@ -80,6 +82,17 @@ private[sql] object OrcFilters extends OrcFiltersBase { def convertibleFiltersHelper( filter: Filter, canPartialPushDown: Boolean): Option[Filter] = filter match { + // At here, it is not safe to just convert one side and remove the other side + // if we do not understand what the parent filters are. + // + // Here is an example used to explain the reason. + // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to + // convert b in ('1'). If we only convert a = 2, we will end up with a filter + // NOT(a = 2), which will generate wrong results. + // + // Pushing one side of AND down is only safe to do at the top level or in the child + // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate + // can be safely removed. case And(left, right) => val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown) val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown) @@ -90,6 +103,17 @@ private[sql] object OrcFilters extends OrcFiltersBase { case _ => None } + // The Or predicate is convertible when both of its children can be pushed down. + // That is to say, if one/both of the children can be partially pushed down, the Or + // predicate can be partially pushed down as well. + // + // Here is an example used to explain the reason. + // Let's say we have + // (a1 AND a2) OR (b1 AND b2), + // a1 and b1 is convertible, while a2 and b2 is not. + // The predicate can be converted as + // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2) + // As per the logical in And predicate, we can push down (a1 OR b1). case Or(left, right) => val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown) val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown) @@ -150,23 +174,19 @@ private[sql] object OrcFilters extends OrcFiltersBase { dataTypeMap: Map[String, DataType], expression: Filter, builder: Builder): Option[Builder] = { - createBuilder(dataTypeMap, expression, builder, canPartialPushDownConjuncts = true) + createBuilder(dataTypeMap, expression, builder) } /** * @param dataTypeMap a map from the attribute name to its data type. * @param expression the input filter predicates. * @param builder the input SearchArgument.Builder. - * @param canPartialPushDownConjuncts whether a subset of conjuncts of predicates can be pushed - * down safely. Pushing ONLY one side of AND down is safe to - * do at the top level or none of its ancestors is NOT and OR. * @return the builder so far. */ private def createBuilder( dataTypeMap: Map[String, DataType], expression: Filter, - builder: Builder, - canPartialPushDownConjuncts: Boolean): Option[Builder] = { + builder: Builder): Option[Builder] = { def getType(attribute: String): PredicateLeaf.Type = getPredicateLeafType(dataTypeMap(attribute)) @@ -174,62 +194,20 @@ private[sql] object OrcFilters extends OrcFiltersBase { expression match { case And(left, right) => - // At here, it is not safe to just convert one side and remove the other side - // if we do not understand what the parent filters are. - // - // Here is an example used to explain the reason. - // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to - // convert b in ('1'). If we only convert a = 2, we will end up with a filter - // NOT(a = 2), which will generate wrong results. - // - // Pushing one side of AND down is only safe to do at the top level or in the child - // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate - // can be safely removed. - val leftBuilderOption = - createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts) - val rightBuilderOption = - createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts) - (leftBuilderOption, rightBuilderOption) match { - case (Some(_), Some(_)) => - for { - lhs <- createBuilder(dataTypeMap, left, - builder.startAnd(), canPartialPushDownConjuncts) - rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts) - } yield rhs.end() - - case (Some(_), None) if canPartialPushDownConjuncts => - createBuilder(dataTypeMap, left, builder, canPartialPushDownConjuncts) - - case (None, Some(_)) if canPartialPushDownConjuncts => - createBuilder(dataTypeMap, right, builder, canPartialPushDownConjuncts) - - case _ => None - } + for { + lhs <- createBuilder(dataTypeMap, left, builder.startAnd()) + rhs <- createBuilder(dataTypeMap, right, lhs) + } yield rhs.end() case Or(left, right) => - // The Or predicate is convertible when both of its children can be pushed down. - // That is to say, if one/both of the children can be partially pushed down, the Or - // predicate can be partially pushed down as well. - // - // Here is an example used to explain the reason. - // Let's say we have - // (a1 AND a2) OR (b1 AND b2), - // a1 and b1 is convertible, while a2 and b2 is not. - // The predicate can be converted as - // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2) - // As per the logical in And predicate, we can push down (a1 OR b1). for { - _ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts) - _ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts) - lhs <- createBuilder(dataTypeMap, left, builder.startOr(), canPartialPushDownConjuncts) - rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts) + lhs <- createBuilder(dataTypeMap, left, builder.startOr()) + rhs <- createBuilder(dataTypeMap, right, lhs) } yield rhs.end() case Not(child) => for { - _ <- createBuilder(dataTypeMap, child, newBuilder, canPartialPushDownConjuncts = false) - negate <- createBuilder(dataTypeMap, - child, builder.startNot(), canPartialPushDownConjuncts = false) + negate <- createBuilder(dataTypeMap, child, builder.startNot()) } yield negate.end() // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`