apache · MaxGekk · Oct 21, 2019 · Oct 21, 2019 · Oct 21, 2019 · Oct 21, 2019
diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt
@@ -0,0 +1,25 @@
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.15
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+string w/ interval                                  386            428          48          2.6         386.4       1.0X
+string w/o interval                                 312            336          33          3.2         312.3       1.2X
+1 units w/ interval                                 933            957          38          1.1         933.0       0.4X
+1 units w/o interval                                919            948          35          1.1         918.8       0.4X
+2 units w/ interval                                1080           1103          23          0.9        1080.5       0.4X
+2 units w/o interval                               1111           1119           8          0.9        1111.5       0.3X
+3 units w/ interval                                1226           1231           5          0.8        1225.7       0.3X
+3 units w/o interval                               1280           1288           9          0.8        1280.3       0.3X
+4 units w/ interval                                1418           1433          13          0.7        1417.7       0.3X
+4 units w/o interval                               1479           1484           8          0.7        1478.7       0.3X
+5 units w/ interval                                1709           1730          18          0.6        1709.3       0.2X
+5 units w/o interval                               1729           1739          10          0.6        1729.1       0.2X
+6 units w/ interval                                1820           1831          10          0.5        1819.9       0.2X
+6 units w/o interval                               1936           1945           9          0.5        1936.2       0.2X
+7 units w/ interval                                2048           2061          11          0.5        2048.2       0.2X
+7 units w/o interval                               2050           2086          31          0.5        2049.8       0.2X
+8 units w/ interval                                2306           2341          30          0.4        2306.4       0.2X
+8 units w/o interval                               2393           2436          55          0.4        2393.3       0.2X
+9 units w/ interval                                2480           2515          39          0.4        2480.1       0.2X
+9 units w/o interval                               2518           2521           5          0.4        2517.8       0.2X
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import scala.collection.mutable.ListBuffer
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.SaveMode.Overwrite
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Synthetic benchmark for interval functions.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/IntervalBenchmark-results.txt".
+ * }}}
+ */
+object IntervalBenchmark extends SqlBasedBenchmark {
+
+  private def doBenchmark(cardinality: Long, exprs: String*): Unit = {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
+      spark
+        .range(0, cardinality, 1, 1)
+        .selectExpr(exprs: _*)
+        .write
+        .format("noop")
+        .mode(Overwrite)
+        .save()
+    }
+  }
+
+  private def addCase(
+      benchmark: Benchmark,
+      cardinality: Long,
+      name: String,
+      exprs: String*): Unit = {
+    benchmark.addCase(name, numIters = 3) { _ =>
+      doBenchmark(cardinality, exprs: _*)
+    }
+  }
+
+  private def buildString(withPrefix: Boolean, units: Seq[String] = Seq.empty): String = {
+    val sep = if (units.length > 0) ", " else ""
+    val otherUnits = s"$sep'${units.mkString(" ")}'"
+    val prefix = if (withPrefix) "'interval'" else "''"
+    s"concat_ws(' ', ${prefix}, cast(id % 10000 AS string), 'years'${otherUnits})"
+  }
+
+  private def addCase(benchmark: Benchmark, cardinality: Long, units: Seq[String]): Unit = {
+    Seq(true, false).foreach { withPrefix =>
+      val expr = s"CAST(${buildString(withPrefix, units)} AS interval)"
+      val note = if (withPrefix) "w/ interval" else "w/o interval"
+      benchmark.addCase(s"${units.length + 1} units $note", numIters = 3) { _ =>
+        doBenchmark(cardinality, expr)
+      }
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val N = 1000000
+    val timeUnits = Seq(
+      "13 months", "100 weeks", "9 days", "12 hours",
+      "5 minutes", "45 seconds", "123 milliseconds", "567 microseconds")
+    val intervalToTest = ListBuffer[String]()
+
+    val benchmark = new Benchmark(s"cast strings to intervals", N, output = output)
+    addCase(benchmark, N, s"string w/ interval", buildString(true, timeUnits))
+    addCase(benchmark, N, s"string w/o interval", buildString(false, timeUnits))
+    addCase(benchmark, N, intervalToTest) // Only years
+
+    for (unit <- timeUnits) {
+      intervalToTest.append(unit)
+      addCase(benchmark, N, intervalToTest)
+    }
+
+    benchmark.run()
+  }
+}