From 640c9cd3b99d51f38c9b1a0c3f94bae676d11e51 Mon Sep 17 00:00:00 2001 From: Kris Mok Date: Tue, 14 Aug 2018 03:50:01 -0700 Subject: [PATCH] SPARK-25113: Add logging to CodeGenerator when any generated method's bytecode size goes above HugeMethodLimit --- .../expressions/codegen/CodeGenerator.scala | 11 +++- .../expressions/CodeGenerationSuite.scala | 64 +++++++++++++++++++ 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 4b30de5aeb7a..e2d74daca56c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1258,7 +1258,8 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin object CodeGenerator extends Logging { - // This is the value of HugeMethodLimit in the OpenJDK JVM settings + // This is the default value of HugeMethodLimit in the OpenJDK HotSpot JVM, + // beyond which methods will be rejected from JIT compilation final val DEFAULT_JVM_HUGE_METHOD_LIMIT = 8000 // The max valid length of method parameters in JVM. @@ -1385,9 +1386,15 @@ object CodeGenerator extends Logging { try { val cf = new ClassFile(new ByteArrayInputStream(classBytes)) val stats = cf.methodInfos.asScala.flatMap { method => - method.getAttributes().filter(_.getClass.getName == codeAttr.getName).map { a => + method.getAttributes().filter(_.getClass eq codeAttr).map { a => val byteCodeSize = codeAttrField.get(a).asInstanceOf[Array[Byte]].length CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.update(byteCodeSize) + + if (byteCodeSize > DEFAULT_JVM_HUGE_METHOD_LIMIT) { + logInfo("Generated method too long to be JIT compiled: " + + s"${cf.getThisClassName}.${method.getName} is $byteCodeSize bytes") + } + byteCodeSize } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index 5b71becee2de..c383eec3d56b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -19,12 +19,16 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.Timestamp +import org.apache.log4j.{Appender, AppenderSkeleton, Logger} +import org.apache.log4j.spi.LoggingEvent + import org.apache.spark.SparkFunSuite import org.apache.spark.metrics.source.CodegenMetrics import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen._ +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.expressions.objects._ import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils} import org.apache.spark.sql.types._ @@ -499,4 +503,64 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { ctx.freshName("a_1") :: ctx.freshName("a_0") :: Nil assert(names2.distinct.length == 4) } + + test("SPARK-25113: should log when there exists generated methods above HugeMethodLimit") { + class MockAppender extends AppenderSkeleton { + var seenMessage = false + + override def append(loggingEvent: LoggingEvent): Unit = { + if (loggingEvent.getRenderedMessage().contains("Generated method too long")) { + seenMessage = true + } + } + + override def close(): Unit = {} + override def requiresLayout(): Boolean = false + } + + val appender = new MockAppender() + withLogAppender(appender) { + val x = 42 + val expr = HugeCodeIntExpression(x) + val proj = GenerateUnsafeProjection.generate(Seq(expr)) + val actual = proj(null) + assert(actual.getInt(0) == x) + } + assert(appender.seenMessage) + } + + private def withLogAppender(appender: Appender)(f: => Unit): Unit = { + val logger = + Logger.getLogger(classOf[CodeGenerator[_, _]].getName) + logger.addAppender(appender) + try f finally { + logger.removeAppender(appender) + } + } +} + +case class HugeCodeIntExpression(value: Int) extends Expression { + override def nullable: Boolean = true + override def dataType: DataType = IntegerType + override def children: Seq[Expression] = Nil + override def eval(input: InternalRow): Any = value + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + // Assuming HugeMethodLimit to be 8000 + val HugeMethodLimit = CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT + // A single "int dummyN = 0;" will be at least 2 bytes of bytecode: + // 0: iconst_0 + // 1: istore_1 + // and it'll become bigger as the number of local variables increases. + // So 4000 such dummy local variable definitions are sufficient to bump the bytecode size + // of a generated method to above 8000 bytes. + val hugeCode = (0 until (HugeMethodLimit / 2)).map(i => s"int dummy$i = 0;").mkString("\n") + val code = + code"""{ + | $hugeCode + |} + |boolean ${ev.isNull} = false; + |int ${ev.value} = $value; + """.stripMargin + ev.copy(code = code) + } }