Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions sql/catalyst/benchmarks/HashBenchmark-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
================================================================================================
single ints
================================================================================================

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash For single ints: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
interpreted version 5615 / 5616 95.6 10.5 1.0X
codegen version 8400 / 8407 63.9 15.6 0.7X
codegen version 64-bit 8139 / 8145 66.0 15.2 0.7X
codegen HiveHash version 7213 / 7348 74.4 13.4 0.8X


================================================================================================
single longs
================================================================================================

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash For single longs: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
interpreted version 6053 / 6054 88.7 11.3 1.0X
codegen version 9367 / 9369 57.3 17.4 0.6X
codegen version 64-bit 8041 / 8051 66.8 15.0 0.8X
codegen HiveHash version 7546 / 7575 71.1 14.1 0.8X


================================================================================================
normal
================================================================================================

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash For normal: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
interpreted version 3181 / 3182 0.7 1517.0 1.0X
codegen version 2403 / 2403 0.9 1145.7 1.3X
codegen version 64-bit 915 / 916 2.3 436.2 3.5X
codegen HiveHash version 4505 / 4527 0.5 2148.3 0.7X


================================================================================================
array
================================================================================================

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash For array: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
interpreted version 1828 / 1844 0.1 13946.1 1.0X
codegen version 3678 / 3804 0.0 28058.2 0.5X
codegen version 64-bit 2925 / 2931 0.0 22317.8 0.6X
codegen HiveHash version 1216 / 1217 0.1 9280.0 1.5X


================================================================================================
map
================================================================================================

OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
Hash For map: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
interpreted version 0 / 0 44.3 22.6 1.0X
codegen version 176 / 176 0.0 42978.8 0.0X
codegen version 64-bit 173 / 175 0.0 42214.3 0.0X
codegen HiveHash version 44 / 44 0.1 10659.9 0.0X


152 changes: 59 additions & 93 deletions sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql

import org.apache.spark.benchmark.Benchmark
import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
import org.apache.spark.sql.catalyst.encoders.RowEncoder
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateSafeProjection
Expand All @@ -26,94 +26,87 @@ import org.apache.spark.sql.types._
/**
* Benchmark for the previous interpreted hash function(InternalRow.hashCode) vs codegened
* hash expressions (Murmur3Hash/xxHash64).
* To run this benchmark:
* {{{
* 1. without sbt:
* bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
* 2. build/sbt "catalyst/test:runMain <this class>"
* 3. generate result:
* SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain <this class>"
* Results will be written to "benchmarks/HashBenchmark-results.txt".
* }}}
*/
object HashBenchmark {
object HashBenchmark extends BenchmarkBase {

def test(name: String, schema: StructType, numRows: Int, iters: Int): Unit = {
val generator = RandomDataGenerator.forType(schema, nullable = false).get
val encoder = RowEncoder(schema)
val attrs = schema.toAttributes
val safeProjection = GenerateSafeProjection.generate(attrs, attrs)
runBenchmark(name) {
val generator = RandomDataGenerator.forType(schema, nullable = false).get
val encoder = RowEncoder(schema)
val attrs = schema.toAttributes
val safeProjection = GenerateSafeProjection.generate(attrs, attrs)

val rows = (1 to numRows).map(_ =>
// The output of encoder is UnsafeRow, use safeProjection to turn in into safe format.
safeProjection(encoder.toRow(generator().asInstanceOf[Row])).copy()
).toArray
val rows = (1 to numRows).map(_ =>
// The output of encoder is UnsafeRow, use safeProjection to turn in into safe format.
safeProjection(encoder.toRow(generator().asInstanceOf[Row])).copy()
).toArray

val benchmark = new Benchmark("Hash For " + name, iters * numRows.toLong)
benchmark.addCase("interpreted version") { _: Int =>
var sum = 0
for (_ <- 0L until iters) {
var i = 0
while (i < numRows) {
sum += rows(i).hashCode()
i += 1
val benchmark = new Benchmark("Hash For " + name, iters * numRows.toLong, output = output)
benchmark.addCase("interpreted version") { _: Int =>
var sum = 0
for (_ <- 0L until iters) {
var i = 0
while (i < numRows) {
sum += rows(i).hashCode()
i += 1
}
}
}
}

val getHashCode = UnsafeProjection.create(new Murmur3Hash(attrs) :: Nil, attrs)
benchmark.addCase("codegen version") { _: Int =>
var sum = 0
for (_ <- 0L until iters) {
var i = 0
while (i < numRows) {
sum += getHashCode(rows(i)).getInt(0)
i += 1
val getHashCode = UnsafeProjection.create(new Murmur3Hash(attrs) :: Nil, attrs)
benchmark.addCase("codegen version") { _: Int =>
var sum = 0
for (_ <- 0L until iters) {
var i = 0
while (i < numRows) {
sum += getHashCode(rows(i)).getInt(0)
i += 1
}
}
}
}

val getHashCode64b = UnsafeProjection.create(new XxHash64(attrs) :: Nil, attrs)
benchmark.addCase("codegen version 64-bit") { _: Int =>
var sum = 0
for (_ <- 0L until iters) {
var i = 0
while (i < numRows) {
sum += getHashCode64b(rows(i)).getInt(0)
i += 1
val getHashCode64b = UnsafeProjection.create(new XxHash64(attrs) :: Nil, attrs)
benchmark.addCase("codegen version 64-bit") { _: Int =>
var sum = 0
for (_ <- 0L until iters) {
var i = 0
while (i < numRows) {
sum += getHashCode64b(rows(i)).getInt(0)
i += 1
}
}
}
}

val getHiveHashCode = UnsafeProjection.create(new HiveHash(attrs) :: Nil, attrs)
benchmark.addCase("codegen HiveHash version") { _: Int =>
var sum = 0
for (_ <- 0L until iters) {
var i = 0
while (i < numRows) {
sum += getHiveHashCode(rows(i)).getInt(0)
i += 1
val getHiveHashCode = UnsafeProjection.create(new HiveHash(attrs) :: Nil, attrs)
benchmark.addCase("codegen HiveHash version") { _: Int =>
var sum = 0
for (_ <- 0L until iters) {
var i = 0
while (i < numRows) {
sum += getHiveHashCode(rows(i)).getInt(0)
i += 1
}
}
}
}

benchmark.run()
benchmark.run()
}
}

def main(args: Array[String]): Unit = {
override def runBenchmarkSuite(): Unit = {
val singleInt = new StructType().add("i", IntegerType)
/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash For single ints: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
interpreted version 3262 / 3267 164.6 6.1 1.0X
codegen version 6448 / 6718 83.3 12.0 0.5X
codegen version 64-bit 6088 / 6154 88.2 11.3 0.5X
codegen HiveHash version 4732 / 4745 113.5 8.8 0.7X
*/
test("single ints", singleInt, 1 << 15, 1 << 14)

val singleLong = new StructType().add("i", LongType)
/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash For single longs: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
interpreted version 3716 / 3726 144.5 6.9 1.0X
codegen version 7706 / 7732 69.7 14.4 0.5X
codegen version 64-bit 6370 / 6399 84.3 11.9 0.6X
codegen HiveHash version 4924 / 5026 109.0 9.2 0.8X
*/
test("single longs", singleLong, 1 << 15, 1 << 14)

val normal = new StructType()
Expand All @@ -131,45 +124,18 @@ object HashBenchmark {
.add("binary", BinaryType)
.add("date", DateType)
.add("timestamp", TimestampType)
/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash For normal: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
interpreted version 2985 / 3013 0.7 1423.4 1.0X
codegen version 2422 / 2434 0.9 1155.1 1.2X
codegen version 64-bit 856 / 920 2.5 408.0 3.5X
codegen HiveHash version 4501 / 4979 0.5 2146.4 0.7X
*/
test("normal", normal, 1 << 10, 1 << 11)

val arrayOfInt = ArrayType(IntegerType)
val array = new StructType()
.add("array", arrayOfInt)
.add("arrayOfArray", ArrayType(arrayOfInt))
/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash For array: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
interpreted version 3100 / 3555 0.0 23651.8 1.0X
codegen version 5779 / 5865 0.0 44088.4 0.5X
codegen version 64-bit 4738 / 4821 0.0 36151.7 0.7X
codegen HiveHash version 2200 / 2246 0.1 16785.9 1.4X
*/
test("array", array, 1 << 8, 1 << 9)

val mapOfInt = MapType(IntegerType, IntegerType)
val map = new StructType()
.add("map", mapOfInt)
.add("mapOfMap", MapType(IntegerType, mapOfInt))
/*
Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
Hash For map: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------
interpreted version 0 / 0 48.1 20.8 1.0X
codegen version 257 / 275 0.0 62768.7 0.0X
codegen version 64-bit 226 / 240 0.0 55224.5 0.0X
codegen HiveHash version 89 / 96 0.0 21708.8 0.0X
*/
test("map", map, 1 << 6, 1 << 6)
}
}