-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-16845][SQL] GeneratedClass$SpecificOrdering grows beyond 64 KB
#15480
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
ecc6720
1ae9935
33b5fd8
0aedc47
3d31cb3
4aef473
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -72,7 +72,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR | |
| * Generates the code for ordering based on the given order. | ||
| */ | ||
| def genComparisons(ctx: CodegenContext, ordering: Seq[SortOrder]): String = { | ||
| val comparisons = ordering.map { order => | ||
| def comparisons(orderingGroup: Seq[SortOrder]) = orderingGroup.map { order => | ||
| val eval = order.child.genCode(ctx) | ||
| val asc = order.isAscending | ||
| val isNullA = ctx.freshName("isNullA") | ||
|
|
@@ -118,7 +118,45 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR | |
| } | ||
| """ | ||
| }.mkString("\n") | ||
| comparisons | ||
|
|
||
| /* | ||
|
||
| * 40 = 7000 bytes / 170 (around 170 bytes per ordering comparison). | ||
|
||
| * The maximum byte code size to be compiled for HotSpot is 8000 bytes. | ||
| * We should keep less than 8000 bytes. | ||
| */ | ||
| val numberOfComparisonsThreshold = 40 | ||
|
|
||
| if (ordering.size <= numberOfComparisonsThreshold) { | ||
| s""" | ||
| | InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. | ||
| | ${comparisons(ordering)} | ||
| """.stripMargin | ||
| } else { | ||
| val groupedOrderingItr = ordering.grouped(numberOfComparisonsThreshold) | ||
| var groupedOrderingLength = 0 | ||
| groupedOrderingItr.zipWithIndex.foreach { case (orderingGroup, i) => | ||
| groupedOrderingLength += 1 | ||
| val funcName = s"compare_$i" | ||
|
||
| val funcCode = | ||
| s""" | ||
| |private int $funcName(InternalRow a, InternalRow b) { | ||
| | InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. | ||
| | ${comparisons(orderingGroup)} | ||
| | return 0; | ||
|
||
| |} | ||
| """.stripMargin | ||
| ctx.addNewFunction(funcName, funcCode) | ||
| } | ||
|
|
||
| (0 to groupedOrderingLength - 1).map { i => | ||
|
||
| s""" | ||
| |int comp_$i = compare_$i(a, b); | ||
| |if (comp_$i != 0) { | ||
| | return comp_$i; | ||
| |} | ||
| """.stripMargin | ||
| }.mkString | ||
| } | ||
| } | ||
|
|
||
| protected def create(ordering: Seq[SortOrder]): BaseOrdering = { | ||
|
|
@@ -142,7 +180,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR | |
| ${ctx.declareAddedFunctions()} | ||
|
|
||
| public int compare(InternalRow a, InternalRow b) { | ||
| InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. | ||
| $comparisons | ||
| return 0; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -127,4 +127,17 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { | |
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") { | ||
| val sortOrder = Literal("abc").asc | ||
|
|
||
| // this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845 | ||
| GenerateOrdering.generate(Array.fill(40)(sortOrder)) | ||
|
|
||
| // this is FAILING prior to SPARK-16845, but it should be passing after SPARK-16845 | ||
| GenerateOrdering.generate(Array.fill(450)(sortOrder)) | ||
|
||
|
|
||
| // verify that we can support up to 10000 ordering comparisons, which should be sufficient | ||
| GenerateOrdering.generate(Array.fill(10000)(sortOrder)) | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this line should not be moved into
GenerateOrdering.genComparisons()if there is not a special reason.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thanks -- but not removing it from here would lead to declaring a local variable
ithat we might not use when(ordering.size > numberOfComparisonsThreshold). what do you think?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think
GenerateOrdering.genComparisons()is expected to generate a PART of comparison method. If the declaration of the variableiis in the part, we can't useGenerateOrdering.genComparisons()twice or more for the same method.Declaration of unused variable will not be a problem.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh I see, makes a lot of sense. Let me fix this, thanks!