From 686a937d41bf2a33f73d8defceb94ff654c5c751 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sat, 14 Sep 2019 11:35:31 +0900 Subject: [PATCH 1/3] Fix --- .../scala/org/apache/spark/sql/BenchmarkQueryTest.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala index 3fcb9892800b..6668bffa269e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala @@ -57,7 +57,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession { } codegenSubtrees.toSeq.foreach { subtree => val code = subtree.doCodeGen()._2 - try { + val (_, maxMethodCodeSize) = try { // Just check the generated code can be properly compiled CodeGenerator.compile(code) } catch { @@ -72,6 +72,10 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession { """.stripMargin throw new Exception(msg, e) } + + assert(maxMethodCodeSize <= CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT, + s"too long generated codes found in the WholeStageCodegenExec subtree (id=${subtree.id}) " + + s"and JIT optimization might not work:\n${subtree.treeString}") } } } From 9f329992a7baec1ae04033b2e75b47f7210fb94f Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sat, 21 Sep 2019 09:48:10 +0900 Subject: [PATCH 2/3] Fix --- .../scala/org/apache/spark/sql/BenchmarkQueryTest.scala | 9 +++++---- .../scala/org/apache/spark/sql/TPCDSQuerySuite.scala | 8 ++++++-- .../sql/execution/LogicalPlanTagInSparkPlanSuite.scala | 5 +++-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala index 6668bffa269e..9bb741bee4e4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql import org.apache.spark.internal.config.Tests.IS_TESTING -import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator} +import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeFormatter, CodeGenerator} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec} import org.apache.spark.sql.test.SharedSparkSession @@ -48,7 +48,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession { RuleExecutor.resetMetrics() } - protected def checkGeneratedCode(plan: SparkPlan): Unit = { + protected def checkGeneratedCode(plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = { val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]() plan foreach { case s: WholeStageCodegenExec => @@ -57,7 +57,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession { } codegenSubtrees.toSeq.foreach { subtree => val code = subtree.doCodeGen()._2 - val (_, maxMethodCodeSize) = try { + val (_, ByteCodeStats(maxMethodCodeSize, _, _)) = try { // Just check the generated code can be properly compiled CodeGenerator.compile(code) } catch { @@ -73,7 +73,8 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession { throw new Exception(msg, e) } - assert(maxMethodCodeSize <= CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT, + assert(!checkMethodCodeSize || + maxMethodCodeSize <= CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT, s"too long generated codes found in the WholeStageCodegenExec subtree (id=${subtree.id}) " + s"and JIT optimization might not work:\n${subtree.treeString}") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala index a668434a68af..01d504a30e48 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala @@ -82,13 +82,17 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema { "q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59", "q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max") + // List up the known queries having too large code in a generated function + val blackListForMethodCodeSizeCheck = Set("modified-q3") + modifiedTPCDSQueries.foreach { name => val queryString = resourceToString(s"tpcds-modifiedQueries/$name.sql", classLoader = Thread.currentThread().getContextClassLoader) - test(s"modified-$name") { + val testName = s"modified-$name" + test(testName) { // check the plans can be properly generated val plan = sql(queryString).queryExecution.executedPlan - checkGeneratedCode(plan) + checkGeneratedCode(plan, !blackListForMethodCodeSizeCheck.contains(testName)) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala index aa83b9b11dcf..44af422b9083 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala @@ -32,8 +32,9 @@ import org.apache.spark.sql.execution.window.WindowExec class LogicalPlanTagInSparkPlanSuite extends TPCDSQuerySuite { - override protected def checkGeneratedCode(plan: SparkPlan): Unit = { - super.checkGeneratedCode(plan) + override protected def checkGeneratedCode( + plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = { + super.checkGeneratedCode(plan, checkMethodCodeSize) checkLogicalPlanTag(plan) } From 93448be1bbc2ab6a0d4cd36af59cea724cdaac98 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sun, 22 Sep 2019 09:25:33 +0900 Subject: [PATCH 3/3] Fix --- .../src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala index 01d504a30e48..11e66e878c14 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala @@ -82,7 +82,9 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema { "q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59", "q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max") - // List up the known queries having too large code in a generated function + // List up the known queries having too large code in a generated function. + // A JIRA file for `modified-q3` is as follows; + // [SPARK-29128] Split predicate code in OR expressions val blackListForMethodCodeSizeCheck = Set("modified-q3") modifiedTPCDSQueries.foreach { name =>