From 94386550523baf5f98427d3ef0b9f9815cee4c69 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Tue, 22 Aug 2017 18:47:19 +0900 Subject: [PATCH 1/2] Use -1 to disable maxLinesPerFunction --- .../expressions/codegen/CodeGenerator.scala | 16 ++++++++++------ .../apache/spark/sql/internal/SQLConf.scala | 6 ++++-- .../sql/execution/WholeStageCodegenSuite.scala | 18 ++++++++++++++---- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 807765c1e00a..48a5fb14707c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -357,15 +357,19 @@ class CodegenContext { /** * It will count the lines of every Java function generated by whole-stage codegen, - * if there is a function of length greater than spark.sql.codegen.maxLinesPerFunction, - * it will return true. + * if there is a function of length greater than `maxLinesPerFunction`, it will return true. + * If `maxLinesPerFunction` has -1, it will always return false. */ def isTooLongGeneratedFunction: Boolean = { - classFunctions.values.exists { _.values.exists { - code => - val codeWithoutComments = CodeFormatter.stripExtraNewLinesAndComments(code) - codeWithoutComments.count(_ == '\n') > SQLConf.get.maxLinesPerFunction + val maxLinesPerFunc = SQLConf.get.maxLinesPerFunction + if (maxLinesPerFunc >= 0) { + classFunctions.values.exists { _.values.exists { code => + val codeWithoutComments = CodeFormatter.stripExtraNewLinesAndComments(code) + codeWithoutComments.count(_ == '\n') > maxLinesPerFunc + } } + } else { + false } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index a685099505ee..ea684296b8b2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -577,9 +577,11 @@ object SQLConf { .doc("The maximum lines of a single Java function generated by whole-stage codegen. " + "When the generated function exceeds this threshold, " + "the whole-stage codegen is deactivated for this subtree of the current query plan. " + - "The default value 4000 is the max length of byte code JIT supported " + - "for a single function(8000) divided by 2.") + "The default value 2667 is the max length of byte code JIT supported " + + "for a single function(8000) divided by 2. Use -1 to disable this.") .intConf + .checkValue(maxLines => maxLines >= -1, "The maximum must not be a negative integer, -1 to " + + "always activate whole-stage codegen.") .createWithDefault(4000) val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala index beeee6a97c8d..9aabedd1bbed 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala @@ -17,9 +17,7 @@ package org.apache.spark.sql.execution -import org.apache.spark.sql.{Column, Dataset, Row} -import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute -import org.apache.spark.sql.catalyst.expressions.{Add, Literal, Stack} +import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext import org.apache.spark.sql.execution.aggregate.HashAggregateExec import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec @@ -193,11 +191,15 @@ class WholeStageCodegenSuite extends SparkPlanTest with SharedSQLContext { } } - test("SPARK-21603 check there is not a too long generated function when threshold is Int.Max") { + test("SPARK-21603 check there is not a too long generated function when threshold is Max/-1") { withSQLConf(SQLConf.WHOLESTAGE_MAX_LINES_PER_FUNCTION.key -> Int.MaxValue.toString) { val ctx = genGroupByCodeGenContext(30) assert(ctx.isTooLongGeneratedFunction === false) } + withSQLConf(SQLConf.WHOLESTAGE_MAX_LINES_PER_FUNCTION.key -> "-1") { + val ctx = genGroupByCodeGenContext(30) + assert(ctx.isTooLongGeneratedFunction === false) + } } test("SPARK-21603 check there is a too long generated function when threshold is 0") { @@ -206,4 +208,12 @@ class WholeStageCodegenSuite extends SparkPlanTest with SharedSQLContext { assert(ctx.isTooLongGeneratedFunction === true) } } + + test("SPARK-21603 `maxLinesPerFunction` must not be negative") { + val errMsg = intercept[IllegalArgumentException] { + withSQLConf(SQLConf.WHOLESTAGE_MAX_LINES_PER_FUNCTION.key -> "-2") {} + }.getMessage + assert(errMsg.contains("The maximum must not be a negative integer, " + + "-1 to always activate whole-stage codegen.")) + } } From a0854ad16003020eaa6f0d1f1c08db726b9196e2 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Thu, 24 Aug 2017 12:37:44 +0900 Subject: [PATCH 2/2] Fix --- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 6 +++--- .../apache/spark/sql/execution/WholeStageCodegenSuite.scala | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ea684296b8b2..4c3ae81abe3f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -577,11 +577,11 @@ object SQLConf { .doc("The maximum lines of a single Java function generated by whole-stage codegen. " + "When the generated function exceeds this threshold, " + "the whole-stage codegen is deactivated for this subtree of the current query plan. " + - "The default value 2667 is the max length of byte code JIT supported " + + "The default value 4000 is the max length of byte code JIT supported " + "for a single function(8000) divided by 2. Use -1 to disable this.") .intConf - .checkValue(maxLines => maxLines >= -1, "The maximum must not be a negative integer, -1 to " + - "always activate whole-stage codegen.") + .checkValue(maxLines => maxLines >= -1, "The maximum must not be a negative integer, " + + "except for -1 using to always activate whole-stage codegen") .createWithDefault(4000) val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala index 9aabedd1bbed..30ed836967b8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala @@ -213,7 +213,7 @@ class WholeStageCodegenSuite extends SparkPlanTest with SharedSQLContext { val errMsg = intercept[IllegalArgumentException] { withSQLConf(SQLConf.WHOLESTAGE_MAX_LINES_PER_FUNCTION.key -> "-2") {} }.getMessage - assert(errMsg.contains("The maximum must not be a negative integer, " + - "-1 to always activate whole-stage codegen.")) + assert(errMsg.contains("The maximum must not be a negative integer, except for -1 using to " + + "always activate whole-stage codegen")) } }