From 4c78ca91d0c0e096e5b0668f35a0d24457e36586 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 28 Sep 2016 13:19:34 +0900 Subject: [PATCH 1/6] Add a test to reproduce the issue. --- .../expressions/CodeGenerationSuite.scala | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index 45dcfcaf2313..5588b4429164 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.expressions +import java.sql.Timestamp + import org.apache.spark.SparkFunSuite import org.apache.spark.metrics.source.CodegenMetrics import org.apache.spark.sql.Row @@ -24,7 +26,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.objects.{CreateExternalRow, GetExternalRowField, ValidateExternalType} -import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.ThreadUtils @@ -164,6 +166,23 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { } } + test("SPARK-17702: split wide constructor into blocks due to JVM code size limit") { + val length = 5000 + val expressions = Seq.fill(length) { + ToUTCTimestamp( + Literal.create(Timestamp.valueOf("2015-07-24 00:00:00"), TimestampType), + Literal.create("PST", StringType)) + } + val plan = GenerateMutableProjection.generate(expressions) + val actual = plan(new GenericMutableRow(length)).toSeq(expressions.map(_.dataType)) + val expected = Seq.fill(length)( + DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-07-24 07:00:00"))) + + if (!checkResult(actual, expected)) { + fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected") + } + } + test("test generated safe and unsafe projection") { val schema = new StructType(Array( StructField("a", StringType, true), From 858a3ec54f78b4865c19ec46aebc9320b36f02b5 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 28 Sep 2016 13:26:38 +0900 Subject: [PATCH 2/6] Split wide constructor into blocks due to JVM code size limit. --- .../expressions/codegen/CodeGenerator.scala | 16 +++++++++++----- .../codegen/GenerateMutableProjection.scala | 3 ++- .../expressions/codegen/GenerateOrdering.scala | 3 ++- .../expressions/codegen/GeneratePredicate.scala | 4 +++- .../codegen/GenerateSafeProjection.scala | 4 +++- .../codegen/GenerateUnsafeProjection.scala | 3 ++- 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 33b9b804fc60..7f2ed44eaa0e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -163,7 +163,8 @@ class CodegenContext { def initMutableStates(): String = { // It's possible that we add same mutable state twice, e.g. the `mergeExpressions` in // `TypedAggregateExpression`, we should call `distinct` here to remove the duplicated ones. - mutableStates.distinct.map(_._3).mkString("\n") + val initCodes = mutableStates.distinct.map(_._3) + splitExpressions(initCodes, "init", ("Object[]", "references") :: Nil) } /** @@ -589,6 +590,11 @@ class CodegenContext { // Cannot split these expressions because they are not created from a row object. return expressions.mkString("\n") } + splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil) + } + + private def splitExpressions( + expressions: Seq[String], funcName: String, arguments: Seq[(String, String)]): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() for (code <- expressions) { @@ -608,11 +614,11 @@ class CodegenContext { // inline execution if only one block blocks.head } else { - val apply = freshName("apply") + val func = freshName(funcName) val functions = blocks.zipWithIndex.map { case (body, i) => - val name = s"${apply}_$i" + val name = s"${func}_$i" val code = s""" - |private void $name(InternalRow $row) { + |private void $name(${arguments.map { case (t, name) => s"$t $name" }.mkString(", ")}) { | $body |} """.stripMargin @@ -620,7 +626,7 @@ class CodegenContext { name } - functions.map(name => s"$name($row);").mkString("\n") + functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")});").mkString("\n") } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala index 0f82d2e613c7..c8764a1a272f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala @@ -93,6 +93,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP val allProjections = ctx.splitExpressions(ctx.INPUT_ROW, projectionCodes) val allUpdates = ctx.splitExpressions(ctx.INPUT_ROW, updates) + val allInitializations = ctx.initMutableStates() val codeBody = s""" public java.lang.Object generate(Object[] references) { @@ -109,7 +110,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP public SpecificMutableProjection(Object[] references) { this.references = references; mutableRow = new $genericMutableRowType(${expressions.size}); - ${ctx.initMutableStates()} + $allInitializations } public ${classOf[BaseMutableProjection].getName} target(MutableRow row) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index f1c30ef6c7fb..5211d74c84bf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -124,6 +124,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR protected def create(ordering: Seq[SortOrder]): BaseOrdering = { val ctx = newCodeGenContext() val comparisons = genComparisons(ctx, ordering) + val allInitializations = ctx.initMutableStates() val codeBody = s""" public SpecificOrdering generate(Object[] references) { return new SpecificOrdering(references); @@ -137,7 +138,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR public SpecificOrdering(Object[] references) { this.references = references; - ${ctx.initMutableStates()} + $allInitializations } public int compare(InternalRow a, InternalRow b) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala index 106bb27964ca..cee23bdd0d07 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala @@ -40,6 +40,8 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool protected def create(predicate: Expression): ((InternalRow) => Boolean) = { val ctx = newCodeGenContext() val eval = predicate.genCode(ctx) + val allInitializations = ctx.initMutableStates() + val codeBody = s""" public SpecificPredicate generate(Object[] references) { return new SpecificPredicate(references); @@ -52,7 +54,7 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool public SpecificPredicate(Object[] references) { this.references = references; - ${ctx.initMutableStates()} + $allInitializations } public boolean eval(InternalRow ${ctx.INPUT_ROW}) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala index b891f9467375..447e4be4f17d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala @@ -155,6 +155,8 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection] """ } val allExpressions = ctx.splitExpressions(ctx.INPUT_ROW, expressionCodes) + val allInitializations = ctx.initMutableStates() + val codeBody = s""" public java.lang.Object generate(Object[] references) { return new SpecificSafeProjection(references); @@ -170,7 +172,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection] public SpecificSafeProjection(Object[] references) { this.references = references; mutableRow = (MutableRow) references[references.length - 1]; - ${ctx.initMutableStates()} + $allInitializations } public java.lang.Object apply(java.lang.Object _i) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala index 75bb6936b49e..9359c7a0421a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala @@ -364,6 +364,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro subexpressionEliminationEnabled: Boolean): UnsafeProjection = { val ctx = newCodeGenContext() val eval = createCode(ctx, expressions, subexpressionEliminationEnabled) + val allInitializations = ctx.initMutableStates() val codeBody = s""" public java.lang.Object generate(Object[] references) { @@ -378,7 +379,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro public SpecificUnsafeProjection(Object[] references) { this.references = references; - ${ctx.initMutableStates()} + $allInitializations } // Scala.Function1 need this From a83df5c0a4ff1035a1fa7eef5993f374605d0a3f Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 28 Sep 2016 14:58:21 +0900 Subject: [PATCH 3/6] Modify added function to use field to initialize mutable states. --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 2 +- .../apache/spark/sql/execution/WholeStageCodegenExec.scala | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 7f2ed44eaa0e..fdfc28f56070 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -164,7 +164,7 @@ class CodegenContext { // It's possible that we add same mutable state twice, e.g. the `mergeExpressions` in // `TypedAggregateExpression`, we should call `distinct` here to remove the duplicated ones. val initCodes = mutableStates.distinct.map(_._3) - splitExpressions(initCodes, "init", ("Object[]", "references") :: Nil) + splitExpressions(initCodes, "init", Nil) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index fb57ed7692de..62bf6f4a81ee 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -316,14 +316,16 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co final class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator { private Object[] references; + private scala.collection.Iterator[] inputs; ${ctx.declareMutableStates()} public GeneratedIterator(Object[] references) { this.references = references; } - public void init(int index, scala.collection.Iterator inputs[]) { + public void init(int index, scala.collection.Iterator[] inputs) { partitionIndex = index; + this.inputs = inputs; ${ctx.initMutableStates()} } From 0241df79433a9c06fc862019d12fe017479db1db Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 28 Sep 2016 15:04:52 +0900 Subject: [PATCH 4/6] Add `\n` to each initialization code for readability of generated code. --- .../spark/sql/catalyst/expressions/codegen/CodeGenerator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index fdfc28f56070..864b4891aba3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -163,7 +163,7 @@ class CodegenContext { def initMutableStates(): String = { // It's possible that we add same mutable state twice, e.g. the `mergeExpressions` in // `TypedAggregateExpression`, we should call `distinct` here to remove the duplicated ones. - val initCodes = mutableStates.distinct.map(_._3) + val initCodes = mutableStates.distinct.map(_._3 + "\n") splitExpressions(initCodes, "init", Nil) } From 80b943570c5a9399a9b8aee3872d7c76563698f0 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 28 Sep 2016 15:14:22 +0900 Subject: [PATCH 5/6] Revert some modifications and move declareAddedFunctions after initMutableStates. --- .../expressions/codegen/GenerateMutableProjection.scala | 6 +++--- .../sql/catalyst/expressions/codegen/GenerateOrdering.scala | 6 +++--- .../catalyst/expressions/codegen/GeneratePredicate.scala | 6 +++--- .../expressions/codegen/GenerateSafeProjection.scala | 6 +++--- .../expressions/codegen/GenerateUnsafeProjection.scala | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala index c8764a1a272f..13d61af1c9b4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala @@ -93,7 +93,6 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP val allProjections = ctx.splitExpressions(ctx.INPUT_ROW, projectionCodes) val allUpdates = ctx.splitExpressions(ctx.INPUT_ROW, updates) - val allInitializations = ctx.initMutableStates() val codeBody = s""" public java.lang.Object generate(Object[] references) { @@ -105,14 +104,15 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP private Object[] references; private MutableRow mutableRow; ${ctx.declareMutableStates()} - ${ctx.declareAddedFunctions()} public SpecificMutableProjection(Object[] references) { this.references = references; mutableRow = new $genericMutableRowType(${expressions.size}); - $allInitializations + ${ctx.initMutableStates()} } + ${ctx.declareAddedFunctions()} + public ${classOf[BaseMutableProjection].getName} target(MutableRow row) { mutableRow = row; return this; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 5211d74c84bf..1cef95654a17 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -124,7 +124,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR protected def create(ordering: Seq[SortOrder]): BaseOrdering = { val ctx = newCodeGenContext() val comparisons = genComparisons(ctx, ordering) - val allInitializations = ctx.initMutableStates() val codeBody = s""" public SpecificOrdering generate(Object[] references) { return new SpecificOrdering(references); @@ -134,13 +133,14 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR private Object[] references; ${ctx.declareMutableStates()} - ${ctx.declareAddedFunctions()} public SpecificOrdering(Object[] references) { this.references = references; - $allInitializations + ${ctx.initMutableStates()} } + ${ctx.declareAddedFunctions()} + public int compare(InternalRow a, InternalRow b) { InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. $comparisons diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala index cee23bdd0d07..39aa7b17de6c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala @@ -40,7 +40,6 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool protected def create(predicate: Expression): ((InternalRow) => Boolean) = { val ctx = newCodeGenContext() val eval = predicate.genCode(ctx) - val allInitializations = ctx.initMutableStates() val codeBody = s""" public SpecificPredicate generate(Object[] references) { @@ -50,13 +49,14 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool class SpecificPredicate extends ${classOf[Predicate].getName} { private final Object[] references; ${ctx.declareMutableStates()} - ${ctx.declareAddedFunctions()} public SpecificPredicate(Object[] references) { this.references = references; - $allInitializations + ${ctx.initMutableStates()} } + ${ctx.declareAddedFunctions()} + public boolean eval(InternalRow ${ctx.INPUT_ROW}) { ${eval.code} return !${eval.isNull} && ${eval.value}; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala index 447e4be4f17d..1c98c9ed1070 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala @@ -155,7 +155,6 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection] """ } val allExpressions = ctx.splitExpressions(ctx.INPUT_ROW, expressionCodes) - val allInitializations = ctx.initMutableStates() val codeBody = s""" public java.lang.Object generate(Object[] references) { @@ -167,14 +166,15 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection] private Object[] references; private MutableRow mutableRow; ${ctx.declareMutableStates()} - ${ctx.declareAddedFunctions()} public SpecificSafeProjection(Object[] references) { this.references = references; mutableRow = (MutableRow) references[references.length - 1]; - $allInitializations + ${ctx.initMutableStates()} } + ${ctx.declareAddedFunctions()} + public java.lang.Object apply(java.lang.Object _i) { InternalRow ${ctx.INPUT_ROW} = (InternalRow) _i; $allExpressions diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala index 9359c7a0421a..7cc45372daa5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala @@ -364,7 +364,6 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro subexpressionEliminationEnabled: Boolean): UnsafeProjection = { val ctx = newCodeGenContext() val eval = createCode(ctx, expressions, subexpressionEliminationEnabled) - val allInitializations = ctx.initMutableStates() val codeBody = s""" public java.lang.Object generate(Object[] references) { @@ -375,13 +374,14 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro private Object[] references; ${ctx.declareMutableStates()} - ${ctx.declareAddedFunctions()} public SpecificUnsafeProjection(Object[] references) { this.references = references; - $allInitializations + ${ctx.initMutableStates()} } + ${ctx.declareAddedFunctions()} + // Scala.Function1 need this public java.lang.Object apply(java.lang.Object row) { return apply((InternalRow) row); From 3c4b765bc39784de9a72219af4a29d03bf24531f Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 29 Sep 2016 16:42:18 +0900 Subject: [PATCH 6/6] Add inline comment. --- .../spark/sql/catalyst/expressions/codegen/CodeGenerator.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 864b4891aba3..5d485d13a3eb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -164,6 +164,8 @@ class CodegenContext { // It's possible that we add same mutable state twice, e.g. the `mergeExpressions` in // `TypedAggregateExpression`, we should call `distinct` here to remove the duplicated ones. val initCodes = mutableStates.distinct.map(_._3 + "\n") + // The generated initialization code may exceed 64kb function size limit in JVM if there are too + // many mutable states, so split it into multiple functions. splitExpressions(initCodes, "init", Nil) }