-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-21871][SQL] Check actual bytecode size when compiling generated code #19083
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
b63d99c
ad7881d
954bb94
7c398e7
897c789
e5fde06
d3c1033
53abab9
2d834ed
b185e49
dfde49b
fca22b7
433f13b
09ae105
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,7 @@ import org.apache.spark.internal.Logging | |
| import org.apache.spark.internal.config._ | ||
| import org.apache.spark.network.util.ByteUnit | ||
| import org.apache.spark.sql.catalyst.analysis.Resolver | ||
| import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator | ||
| import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter | ||
|
|
||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||
|
|
@@ -575,15 +576,15 @@ object SQLConf { | |
| "disable logging or -1 to apply no limit.") | ||
| .createWithDefault(1000) | ||
|
|
||
| val WHOLESTAGE_MAX_LINES_PER_FUNCTION = buildConf("spark.sql.codegen.maxLinesPerFunction") | ||
| val WHOLESTAGE_HUGE_METHOD_LIMIT = buildConf("spark.sql.codegen.hugeMethodLimit") | ||
| .internal() | ||
| .doc("The maximum lines of a single Java function generated by whole-stage codegen. " + | ||
| "When the generated function exceeds this threshold, " + | ||
| .doc("The maximum bytecode size of a single compiled Java function generated by whole-stage " + | ||
| "codegen. When the compiled function exceeds this threshold, " + | ||
| "the whole-stage codegen is deactivated for this subtree of the current query plan. " + | ||
|
||
| "The default value 4000 is the max length of byte code JIT supported " + | ||
| "for a single function(8000) divided by 2.") | ||
| s"The default value is ${CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT} and " + | ||
| "this is a limit in the OpenJDK JVM implementation.") | ||
| .intConf | ||
| .createWithDefault(4000) | ||
| .createWithDefault(CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT) | ||
|
|
||
| val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") | ||
| .doc("The maximum number of bytes to pack into a single partition when reading files.") | ||
|
|
@@ -1058,7 +1059,7 @@ class SQLConf extends Serializable with Logging { | |
|
|
||
| def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES) | ||
|
|
||
| def maxLinesPerFunction: Int = getConf(WHOLESTAGE_MAX_LINES_PER_FUNCTION) | ||
| def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT) | ||
|
|
||
| def tableRelationCacheSize: Int = | ||
| getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -380,23 +380,26 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co | |
|
|
||
| override def doExecute(): RDD[InternalRow] = { | ||
| val (ctx, cleanedSource) = doCodeGen() | ||
| if (ctx.isTooLongGeneratedFunction) { | ||
| logWarning("Found too long generated codes and JIT optimization might not work, " + | ||
| "Whole-stage codegen disabled for this plan, " + | ||
| "You can change the config spark.sql.codegen.MaxFunctionLength " + | ||
| "to adjust the function length limit:\n " | ||
| + s"$treeString") | ||
| return child.execute() | ||
| } | ||
| // try to compile and fallback if it failed | ||
| try { | ||
| val (_, maxCodeSize) = try { | ||
| CodeGenerator.compile(cleanedSource) | ||
| } catch { | ||
| case _: Exception if !Utils.isTesting && sqlContext.conf.codegenFallback => | ||
| // We should already saw the error message | ||
| logWarning(s"Whole-stage codegen disabled for this plan:\n $treeString") | ||
| return child.execute() | ||
| } | ||
|
|
||
| // Check if compiled code has a too large function | ||
| if (maxCodeSize > sqlContext.conf.hugeMethodLimit) { | ||
| logWarning(s"Found too long generated codes and JIT optimization might not work: " + | ||
| s"the bytecode size was $maxCodeSize, this value went over the limit " + | ||
| s"${sqlContext.conf.hugeMethodLimit}, and the whole-stage codegen was disable " + | ||
|
||
| s"for this plan. To avoid this, you can set the limit " + | ||
|
||
| s"${SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.key} higher:\n$treeString") | ||
| return child.execute() | ||
| } | ||
|
|
||
| val references = ctx.references.toArray | ||
|
|
||
| val durationMs = longMetric("pipelineTime") | ||
|
|
@@ -405,7 +408,7 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co | |
| assert(rdds.size <= 2, "Up to two input RDDs can be supported") | ||
| if (rdds.length == 1) { | ||
| rdds.head.mapPartitionsWithIndex { (index, iter) => | ||
| val clazz = CodeGenerator.compile(cleanedSource) | ||
| val (clazz, _) = CodeGenerator.compile(cleanedSource) | ||
| val buffer = clazz.generate(references).asInstanceOf[BufferedRowIterator] | ||
| buffer.init(index, Array(iter)) | ||
| new Iterator[InternalRow] { | ||
|
|
@@ -424,7 +427,7 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co | |
| // a small hack to obtain the correct partition index | ||
| }.mapPartitionsWithIndex { (index, zippedIter) => | ||
| val (leftIter, rightIter) = zippedIter.next() | ||
| val clazz = CodeGenerator.compile(cleanedSource) | ||
| val (clazz, _) = CodeGenerator.compile(cleanedSource) | ||
| val buffer = clazz.generate(references).asInstanceOf[BufferedRowIterator] | ||
| buffer.init(index, Array(leftIter, rightIter)) | ||
| new Iterator[InternalRow] { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add
@returnto explain what are returned.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok