Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
================================================================================================
Benchmark for MurMurHash 3 and xxHash64
================================================================================================

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash byte arrays with length 8: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 16 / 16 127.7 7.8 1.0X
xxHash 64-bit 23 / 23 90.7 11.0 0.7X
HiveHasher 16 / 16 134.8 7.4 1.1X

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash byte arrays with length 16: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 26 / 26 79.5 12.6 1.0X
xxHash 64-bit 26 / 27 79.3 12.6 1.0X
HiveHasher 30 / 30 70.1 14.3 0.9X

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash byte arrays with length 24: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 36 / 36 58.1 17.2 1.0X
xxHash 64-bit 30 / 30 70.2 14.2 1.2X
HiveHasher 45 / 45 46.4 21.5 0.8X

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash byte arrays with length 31: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 50 / 50 41.8 23.9 1.0X
xxHash 64-bit 43 / 43 49.3 20.3 1.2X
HiveHasher 58 / 58 35.9 27.8 0.9X

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash byte arrays with length 95: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 132 / 132 15.9 62.7 1.0X
xxHash 64-bit 79 / 79 26.7 37.5 1.7X
HiveHasher 198 / 199 10.6 94.6 0.7X

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash byte arrays with length 287: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 334 / 334 6.3 159.3 1.0X
xxHash 64-bit 126 / 126 16.7 59.9 2.7X
HiveHasher 633 / 634 3.3 302.0 0.5X

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash byte arrays with length 1055: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 1149 / 1149 1.8 547.9 1.0X
xxHash 64-bit 327 / 327 6.4 155.9 3.5X
HiveHasher 2338 / 2346 0.9 1114.6 0.5X

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash byte arrays with length 2079: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 2215 / 2216 0.9 1056.1 1.0X
xxHash 64-bit 554 / 554 3.8 264.0 4.0X
HiveHasher 4609 / 4609 0.5 2197.5 0.5X

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash byte arrays with length 8223: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 8633 / 8643 0.2 4116.3 1.0X
xxHash 64-bit 1891 / 1892 1.1 901.6 4.6X
HiveHasher 18206 / 18206 0.1 8681.3 0.5X


Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,24 @@ package org.apache.spark.sql

import java.util.Random

import org.apache.spark.benchmark.Benchmark
import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
import org.apache.spark.sql.catalyst.expressions.{HiveHasher, XXH64}
import org.apache.spark.unsafe.Platform
import org.apache.spark.unsafe.hash.Murmur3_x86_32

/**
* Synthetic benchmark for MurMurHash 3 and xxHash64.
* To run this benchmark:
* {{{
* 1. without sbt:
* bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
* 2. build/sbt "catalyst/test:runMain <this class>"
* 3. generate result:
* SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain <this class>"
* Results will be written to "benchmarks/HashByteArrayBenchmark-results.txt".
* }}}
*/
object HashByteArrayBenchmark {
object HashByteArrayBenchmark extends BenchmarkBase {
def test(length: Int, seed: Long, numArrays: Int, iters: Int): Unit = {
val random = new Random(seed)
val arrays = Array.fill[Array[Byte]](numArrays) {
Expand All @@ -36,8 +45,8 @@ object HashByteArrayBenchmark {
bytes
}

val benchmark =
new Benchmark("Hash byte arrays with length " + length, iters * numArrays.toLong)
val benchmark = new Benchmark(
"Hash byte arrays with length " + length, iters * numArrays.toLong, output = output)
benchmark.addCase("Murmur3_x86_32") { _: Int =>
var sum = 0L
for (_ <- 0L until iters) {
Expand Down Expand Up @@ -74,96 +83,17 @@ object HashByteArrayBenchmark {
benchmark.run()
}

def main(args: Array[String]): Unit = {
/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash byte arrays with length 8: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 12 / 16 174.3 5.7 1.0X
xxHash 64-bit 17 / 22 120.0 8.3 0.7X
HiveHasher 13 / 15 162.1 6.2 0.9X
*/
test(8, 42L, 1 << 10, 1 << 11)

/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash byte arrays with length 16: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 19 / 22 107.6 9.3 1.0X
xxHash 64-bit 20 / 24 104.6 9.6 1.0X
HiveHasher 24 / 28 87.0 11.5 0.8X
*/
test(16, 42L, 1 << 10, 1 << 11)

/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash byte arrays with length 24: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 28 / 32 74.8 13.4 1.0X
xxHash 64-bit 24 / 29 87.3 11.5 1.2X
HiveHasher 36 / 41 57.7 17.3 0.8X
*/
test(24, 42L, 1 << 10, 1 << 11)

// Add 31 to all arrays to create worse case alignment for xxHash.
/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash byte arrays with length 31: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 41 / 45 51.1 19.6 1.0X
xxHash 64-bit 36 / 44 58.8 17.0 1.2X
HiveHasher 49 / 54 42.6 23.5 0.8X
*/
test(31, 42L, 1 << 10, 1 << 11)

/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash byte arrays with length 95: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 100 / 110 21.0 47.7 1.0X
xxHash 64-bit 74 / 78 28.2 35.5 1.3X
HiveHasher 189 / 196 11.1 90.3 0.5X
*/
test(64 + 31, 42L, 1 << 10, 1 << 11)

/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash byte arrays with length 287: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 299 / 311 7.0 142.4 1.0X
xxHash 64-bit 113 / 122 18.5 54.1 2.6X
HiveHasher 620 / 624 3.4 295.5 0.5X
*/
test(256 + 31, 42L, 1 << 10, 1 << 11)

/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash byte arrays with length 1055: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 1068 / 1070 2.0 509.1 1.0X
xxHash 64-bit 306 / 315 6.9 145.9 3.5X
HiveHasher 2316 / 2369 0.9 1104.3 0.5X
*/
test(1024 + 31, 42L, 1 << 10, 1 << 11)

/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash byte arrays with length 2079: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 2252 / 2274 0.9 1074.1 1.0X
xxHash 64-bit 534 / 580 3.9 254.6 4.2X
HiveHasher 4739 / 4786 0.4 2259.8 0.5X
*/
test(2048 + 31, 42L, 1 << 10, 1 << 11)

/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash byte arrays with length 8223: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
Murmur3_x86_32 9249 / 9586 0.2 4410.5 1.0X
xxHash 64-bit 2897 / 3241 0.7 1381.6 3.2X
HiveHasher 19392 / 20211 0.1 9246.6 0.5X
*/
test(8192 + 31, 42L, 1 << 10, 1 << 11)
override def runBenchmarkSuite(): Unit = {
runBenchmark("Benchmark for MurMurHash 3 and xxHash64") {
test(8, 42L, 1 << 10, 1 << 11)
test(16, 42L, 1 << 10, 1 << 11)
test(24, 42L, 1 << 10, 1 << 11)
test(31, 42L, 1 << 10, 1 << 11)
test(64 + 31, 42L, 1 << 10, 1 << 11)
test(256 + 31, 42L, 1 << 10, 1 << 11)
test(1024 + 31, 42L, 1 << 10, 1 << 11)
test(2048 + 31, 42L, 1 << 10, 1 << 11)
test(8192 + 31, 42L, 1 << 10, 1 << 11)
}
}
}