Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions sql/core/benchmarks/IntervalBenchmark-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.15
Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
string w/ interval 386 428 48 2.6 386.4 1.0X
string w/o interval 312 336 33 3.2 312.3 1.2X
1 units w/ interval 933 957 38 1.1 933.0 0.4X
1 units w/o interval 919 948 35 1.1 918.8 0.4X
2 units w/ interval 1080 1103 23 0.9 1080.5 0.4X
2 units w/o interval 1111 1119 8 0.9 1111.5 0.3X
3 units w/ interval 1226 1231 5 0.8 1225.7 0.3X
3 units w/o interval 1280 1288 9 0.8 1280.3 0.3X
4 units w/ interval 1418 1433 13 0.7 1417.7 0.3X
4 units w/o interval 1479 1484 8 0.7 1478.7 0.3X
5 units w/ interval 1709 1730 18 0.6 1709.3 0.2X
5 units w/o interval 1729 1739 10 0.6 1729.1 0.2X
6 units w/ interval 1820 1831 10 0.5 1819.9 0.2X
6 units w/o interval 1936 1945 9 0.5 1936.2 0.2X
7 units w/ interval 2048 2061 11 0.5 2048.2 0.2X
7 units w/o interval 2050 2086 31 0.5 2049.8 0.2X
8 units w/ interval 2306 2341 30 0.4 2306.4 0.2X
8 units w/o interval 2393 2436 55 0.4 2393.3 0.2X
9 units w/ interval 2480 2515 39 0.4 2480.1 0.2X
9 units w/o interval 2518 2521 5 0.4 2517.8 0.2X

Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.benchmark

import scala.collection.mutable.ListBuffer

import org.apache.spark.benchmark.Benchmark
import org.apache.spark.sql.SaveMode.Overwrite
import org.apache.spark.sql.internal.SQLConf

/**
* Synthetic benchmark for interval functions.
* To run this benchmark:
* {{{
* 1. without sbt:
* bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
* 2. build/sbt "sql/test:runMain <this class>"
* 3. generate result:
* SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
* Results will be written to "benchmarks/IntervalBenchmark-results.txt".
* }}}
*/
object IntervalBenchmark extends SqlBasedBenchmark {

private def doBenchmark(cardinality: Long, exprs: String*): Unit = {
withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
spark
.range(0, cardinality, 1, 1)
.selectExpr(exprs: _*)
.write
.format("noop")
.mode(Overwrite)
.save()
}
}

private def addCase(
benchmark: Benchmark,
cardinality: Long,
name: String,
exprs: String*): Unit = {
benchmark.addCase(name, numIters = 3) { _ =>
doBenchmark(cardinality, exprs: _*)
}
}

private def buildString(withPrefix: Boolean, units: Seq[String] = Seq.empty): String = {
val sep = if (units.length > 0) ", " else ""
val otherUnits = s"$sep'${units.mkString(" ")}'"
val prefix = if (withPrefix) "'interval'" else "''"
s"concat_ws(' ', ${prefix}, cast(id % 10000 AS string), 'years'${otherUnits})"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity, why do you use string instead of Scala API functions? I personally find it's better to use them in such cases.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should construct the string manually and only benchmark string literal to interval. Otherwise the benchmark result might be affected by the concat_ws function.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Overhead of preparing benchmark input is non-zero in most cases. That's why I always measure the input preparation, see the first lines in the results: https://github.com/apache/spark/pull/26189/files#diff-586487fac2b9b1303aaf80adf8fa37abR5-R6 . So, we can subtract time for preparation from other numbers.

I think we should construct the string manually and only benchmark string literal to interval.

Could you explain, please, what do you mean by "manually". And how this will make the overhead for preparation insignificant.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK I see that id column is used to construct the interval string, so we must use concat_ws function.

Agree with @HyukjinKwon that it's more readable to use dataframe Column API instead of bare SQL string.

}

private def addCase(benchmark: Benchmark, cardinality: Long, units: Seq[String]): Unit = {
Seq(true, false).foreach { withPrefix =>
val expr = s"CAST(${buildString(withPrefix, units)} AS interval)"
val note = if (withPrefix) "w/ interval" else "w/o interval"
benchmark.addCase(s"${units.length + 1} units $note", numIters = 3) { _ =>
doBenchmark(cardinality, expr)
}
}
}

override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
val N = 1000000
val timeUnits = Seq(
"13 months", "100 weeks", "9 days", "12 hours",
"5 minutes", "45 seconds", "123 milliseconds", "567 microseconds")
val intervalToTest = ListBuffer[String]()

val benchmark = new Benchmark(s"cast strings to intervals", N, output = output)
addCase(benchmark, N, s"string w/ interval", buildString(true, timeUnits))
addCase(benchmark, N, s"string w/o interval", buildString(false, timeUnits))
addCase(benchmark, N, intervalToTest) // Only years

for (unit <- timeUnits) {
intervalToTest.append(unit)
addCase(benchmark, N, intervalToTest)
}

benchmark.run()
}
}