Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -610,8 +610,24 @@ class CodegenContext {
splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil)
}

private def splitExpressions(
expressions: Seq[String], funcName: String, arguments: Seq[(String, String)]): String = {
/**
* Splits the generated code of expressions into multiple functions, because function has
* 64kb code size limit in JVM
*
* @param expressions the codes to evaluate expressions.
* @param funcName the split function name base.
* @param arguments the list of (type, name) of the arguments of the split function.
* @param returnType the return type of the split function.
* @param makeSplitFunction makes split function body, e.g. add preparation or cleanup.
* @param foldFunctions folds the split function calls.
*/
def splitExpressions(
expressions: Seq[String],
funcName: String,
arguments: Seq[(String, String)],
returnType: String = "void",
makeSplitFunction: String => String = identity,
foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
val blocks = new ArrayBuffer[String]()
val blockBuilder = new StringBuilder()
for (code <- expressions) {
Expand All @@ -632,18 +648,19 @@ class CodegenContext {
blocks.head
} else {
val func = freshName(funcName)
val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ")
val functions = blocks.zipWithIndex.map { case (body, i) =>
val name = s"${func}_$i"
val code = s"""
|private void $name(${arguments.map { case (t, name) => s"$t $name" }.mkString(", ")}) {
| $body
|private $returnType $name($argString) {
| ${makeSplitFunction(body)}
|}
""".stripMargin
addNewFunction(name, code)
name
}

functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")});").mkString("\n")
foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
}
}
"""
}.mkString("\n")
comparisons
}

ctx.splitExpressions(
expressions = comparisons,
funcName = "compare",
arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")),
returnType = "int",
makeSplitFunction = { body =>
s"""
InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated.
$body
return 0;
"""
},
foldFunctions = { funCalls =>
val comp = ctx.freshName("comp")
funCalls.zipWithIndex.map { case (funCall, i) =>
s"""
int ${comp}_$i = $funCall;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: ctx.freshName already adds postfix to the name, you don't need to add _$i again.

Copy link
Contributor Author

@lw-lin lw-lin Jan 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's comp_0, comp_1 in the following:

/* 15388 */   public int compare(InternalRow a, InternalRow b) {
/* 15389 */
/* 15390 */     int comp_0 = compare_0(a, b);
/* 15391 */     if (comp_0 != 0) {
/* 15392 */       return comp_0;
/* 15393 */     }
/* 15394 */
/* 15395 */     int comp_1 = compare_1(a, b);
/* 15396 */     if (comp_1 != 0) {
/* 15397 */       return comp_1;
/* 15398 */     }
/* 1.... */
/* 1.... */     ...
/* 1.... */
/* 15450 */     return 0;
/* 15451 */   }

so maybe let's keep this _$i?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you double check? the implementation of freshName is

if (freshNameIds.contains(fullName)) {
      val id = freshNameIds(fullName)
      freshNameIds(fullName) = id + 1
      s"$fullName$id"
    } else {
      freshNameIds += fullName -> 1
      fullName
    }

it already adds an id postfix.

Copy link
Contributor Author

@lw-lin lw-lin Jan 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I mis-understood it. You meant moving ctx.freshName("comp") into the funCalls.zipWithIndex.map {...}, right? like the following:

// val comp = ctx.freshName("comp")    // this is moved into the map {...}
funCalls.zipWithIndex.map { case (funCall, i) =>
  s"""
    int ${ctx.freshName("comp")} = $funCall;
 ...
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean

val comp = ctx.freshName("comp")
funCalls.zipWithIndex.map { case (funCall, i) =>
  s"""
    int $comp = $funCall;
 ...
}

just remove all the _$i here

Copy link
Contributor Author

@lw-lin lw-lin Jan 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for clarifying on this. but we'll get something like (suppose we got a fresh name comp_1):

/* 15388 */   public int compare(InternalRow a, InternalRow b) {
/* 15389 */
/* 15390 */     int comp_1 = compare_0(a, b);     // comp_1 
/* 15391 */     if (comp_1 != 0) {
/* 15392 */       return comp_1;
/* 15393 */     }
/* 15394 */
/* 15395 */     int comp_1 = compare_1(a, b);     // still comp_1 
/* 15396 */     if (comp_1 != 0) {
/* 15397 */       return comp_1;
/* 15398 */     }
/* 1.... */
/* 1.... */     ...
/* 1.... */
/* 15450 */     return 0;
/* 15451 */   }

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh sorry, we should put the freshName in the map function

funCalls.zipWithIndex.map { case (funCall, i) =>
  val comp = ctx.freshName("comp")
  s"""
    int $comp = $funCall;
 ...
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah i see - let me update this :-)

if (${comp}_$i != 0) {
return ${comp}_$i;
}
"""
}.mkString
})
}

protected def create(ordering: Seq[SortOrder]): BaseOrdering = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,17 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}
}

test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") {
val sortOrder = Literal("abc").asc

// this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845
GenerateOrdering.generate(Array.fill(40)(sortOrder))

// this is FAILING prior to SPARK-16845, but it should be passing after SPARK-16845
GenerateOrdering.generate(Array.fill(450)(sortOrder))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unnecessary, it's covered by the 5000 test case.

Copy link
Contributor Author

@lw-lin lw-lin Jan 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, let's remove the 450 test case


// verify that we can support up to 5000 ordering comparisons, which should be sufficient
GenerateOrdering.generate(Array.fill(5000)(sortOrder))
}
}