diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt index 07c794fe0c75d..89d3616d2c205 100644 --- a/sql/core/benchmarks/ExtractBenchmark-results.txt +++ b/sql/core/benchmarks/ExtractBenchmark-results.txt @@ -98,3 +98,22 @@ MILLISECONDS of date 1744 1749 MICROSECONDS of date 1592 1594 1 6.3 159.2 0.6X EPOCH of date 2368 2371 3 4.2 236.8 0.4X +Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.15 +Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +Invoke date_part for interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +cast to interval 1365 1395 31 7.3 136.5 1.0X +MILLENNIUM of interval 1620 1651 27 6.2 162.0 0.8X +CENTURY of interval 1469 1487 22 6.8 146.9 0.9X +DECADE of interval 1462 1473 17 6.8 146.2 0.9X +YEAR of interval 1438 1447 8 7.0 143.8 0.9X +QUARTER of interval 1456 1458 3 6.9 145.6 0.9X +MONTH of interval 1440 1452 16 6.9 144.0 0.9X +DAY of interval 1478 1485 6 6.8 147.8 0.9X +HOUR of interval 1579 1580 3 6.3 157.9 0.9X +MINUTE of interval 1598 1605 11 6.3 159.8 0.9X +SECOND of interval 1571 1579 10 6.4 157.1 0.9X +MILLISECONDS of interval 1570 1577 6 6.4 157.0 0.9X +MICROSECONDS of interval 1484 1488 5 6.7 148.4 0.9X +EPOCH of interval 1521 1522 1 6.6 152.1 0.9X + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala index 2bd73c1dc14f8..941649df6b727 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala @@ -61,8 +61,10 @@ object ExtractBenchmark extends SqlBasedBenchmark { } private def castExpr(from: String): String = from match { - case "timestamp" => s"cast(id as timestamp)" - case "date" => s"cast(cast(id as timestamp) as date)" + case "timestamp" => "cast(id as timestamp)" + case "date" => "cast(cast(id as timestamp) as date)" + case "interval" => "(cast(cast(id as timestamp) as date) - date'0001-01-01') + " + + "(cast(id as timestamp) - timestamp'1000-01-01 01:02:03.123456')" case other => throw new IllegalArgumentException( s"Unsupported column type $other. Valid column types are 'timestamp' and 'date'") } @@ -74,8 +76,8 @@ object ExtractBenchmark extends SqlBasedBenchmark { field: String, from: String): Unit = { val expr = func match { - case "extract" => s"EXTRACT($field FROM ${castExpr(from)})" - case "date_part" => s"DATE_PART('$field', ${castExpr(from)})" + case "extract" => s"EXTRACT($field FROM ${castExpr(from)}) AS $field" + case "date_part" => s"DATE_PART('$field', ${castExpr(from)}) AS $field" case other => throw new IllegalArgumentException( s"Unsupported function '$other'. Valid functions are 'extract' and 'date_part'.") } @@ -84,24 +86,36 @@ object ExtractBenchmark extends SqlBasedBenchmark { } } + private case class Settings(fields: Seq[String], func: Seq[String], iterNum: Long) + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val N = 10000000L - val fields = Seq( + val datetimeFields = Seq( "MILLENNIUM", "CENTURY", "DECADE", "YEAR", "ISOYEAR", "QUARTER", "MONTH", "WEEK", "DAY", "DAYOFWEEK", "DOW", "ISODOW", "DOY", "HOUR", "MINUTE", "SECOND", "MILLISECONDS", "MICROSECONDS", "EPOCH") + val intervalFields = Seq( + "MILLENNIUM", "CENTURY", "DECADE", "YEAR", + "QUARTER", "MONTH", "DAY", + "HOUR", "MINUTE", "SECOND", + "MILLISECONDS", "MICROSECONDS", "EPOCH") + val settings = Map( + "timestamp" -> Settings(datetimeFields, Seq("extract", "date_part"), N), + "date" -> Settings(datetimeFields, Seq("extract", "date_part"), N), + "interval" -> Settings(intervalFields, Seq("date_part"), N)) + + for { + (dataType, Settings(fields, funcs, iterNum)) <- settings + func <- funcs} { - Seq("extract", "date_part").foreach { func => - Seq("timestamp", "date").foreach { dateType => - val benchmark = new Benchmark(s"Invoke $func for $dateType", N, output = output) + val benchmark = new Benchmark(s"Invoke $func for $dataType", N, output = output) - run(benchmark, N, s"cast to $dateType", castExpr(dateType)) - fields.foreach(run(benchmark, func, N, _, dateType)) + run(benchmark, iterNum, s"cast to $dataType", castExpr(dataType)) + fields.foreach(run(benchmark, func, iterNum, _, dataType)) - benchmark.run() - } + benchmark.run() } } }