-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-23711][SQL] Add fallback generator for UnsafeProjection #21106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
fe2a1cd
36f90cf
68213fb
6cdaec4
32e2766
aeef468
1f5cc17
4c04d14
67f8701
e01fe97
de88f88
f883c2b
716d88f
8c56ae4
5a735af
5569646
ed525f6
c9ec817
5527595
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.catalyst.expressions | ||
|
|
||
| import org.codehaus.commons.compiler.CompileException | ||
| import org.codehaus.janino.InternalCompilerException | ||
|
|
||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.util.Utils | ||
|
|
||
| /** | ||
| * Catches compile error during code generation. | ||
| */ | ||
| object CodegenError { | ||
| def unapply(throwable: Throwable): Option[Exception] = throwable match { | ||
| case e: InternalCompilerException => Some(e) | ||
| case e: CompileException => Some(e) | ||
| case _ => None | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * A factory which can be used to create objects that have both codegen and interpreted | ||
| * implementations. This tries to create codegen object first, if any compile error happens, | ||
| * it fallbacks to interpreted version. | ||
| */ | ||
| abstract class CodegenObjectFactory[IN, OUT] { | ||
|
|
||
| // Creates wanted object. First trying codegen implementation. If any compile error happens, | ||
| // fallbacks to interpreted version. | ||
| def createObject(in: IN): OUT = { | ||
| val fallbackMode = SQLConf.get.getConf(SQLConf.CODEGEN_OBJECT_FALLBACK) | ||
|
||
| // Only in tests, we can use `SQLConf.CODEGEN_OBJECT_FALLBACK` to choose codegen/interpreted | ||
| // only path. | ||
| if (Utils.isTesting && fallbackMode != "fallback") { | ||
| fallbackMode match { | ||
| case "codegen-only" => createCodeGeneratedObject(in) | ||
| case "interpreted-only" => createInterpretedObject(in) | ||
| } | ||
| } else { | ||
| try { | ||
| createCodeGeneratedObject(in) | ||
| } catch { | ||
| case CodegenError(_) => createInterpretedObject(in) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| protected def createCodeGeneratedObject(in: IN): OUT | ||
| protected def createInterpretedObject(in: IN): OUT | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,12 +87,11 @@ class InterpretedUnsafeProjection(expressions: Array[Expression]) extends Unsafe | |
| /** | ||
| * Helper functions for creating an [[InterpretedUnsafeProjection]]. | ||
| */ | ||
| object InterpretedUnsafeProjection extends UnsafeProjectionCreator { | ||
|
|
||
| object InterpretedUnsafeProjection { | ||
| /** | ||
| * Returns an [[UnsafeProjection]] for given sequence of bound Expressions. | ||
| */ | ||
| override protected def createProjection(exprs: Seq[Expression]): UnsafeProjection = { | ||
| protected[sql] def createProjection(exprs: Seq[Expression]): UnsafeProjection = { | ||
|
||
| // We need to make sure that we do not reuse stateful expressions. | ||
| val cleanedExpressions = exprs.map(_.transform { | ||
| case s: Stateful => s.freshCopy() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -108,7 +108,31 @@ abstract class UnsafeProjection extends Projection { | |
| override def apply(row: InternalRow): UnsafeRow | ||
| } | ||
|
|
||
| trait UnsafeProjectionCreator { | ||
| /** | ||
| * The factory object for `UnsafeProjection`. | ||
| */ | ||
| object UnsafeProjection extends CodegenObjectFactory[Seq[Expression], UnsafeProjection] { | ||
|
||
|
|
||
| override protected def createCodeGeneratedObject(in: Seq[Expression]): UnsafeProjection = { | ||
| GenerateUnsafeProjection.generate(in) | ||
| } | ||
|
|
||
| override protected def createInterpretedObject(in: Seq[Expression]): UnsafeProjection = { | ||
| InterpretedUnsafeProjection.createProjection(in) | ||
| } | ||
|
|
||
| protected def toBoundExprs( | ||
| exprs: Seq[Expression], | ||
| inputSchema: Seq[Attribute]): Seq[Expression] = { | ||
| exprs.map(BindReferences.bindReference(_, inputSchema)) | ||
| } | ||
|
|
||
| protected def toUnsafeExprs(exprs: Seq[Expression]): Seq[Expression] = { | ||
| exprs.map(_ transform { | ||
| case CreateNamedStruct(children) => CreateNamedStructUnsafe(children) | ||
| }) | ||
| } | ||
|
|
||
| /** | ||
| * Returns an UnsafeProjection for given StructType. | ||
| * | ||
|
|
@@ -129,10 +153,7 @@ trait UnsafeProjectionCreator { | |
| * Returns an UnsafeProjection for given sequence of bound Expressions. | ||
| */ | ||
| def create(exprs: Seq[Expression]): UnsafeProjection = { | ||
| val unsafeExprs = exprs.map(_ transform { | ||
| case CreateNamedStruct(children) => CreateNamedStructUnsafe(children) | ||
| }) | ||
| createProjection(unsafeExprs) | ||
| createObject(toUnsafeExprs(exprs)) | ||
| } | ||
|
|
||
| def create(expr: Expression): UnsafeProjection = create(Seq(expr)) | ||
|
|
@@ -142,34 +163,24 @@ trait UnsafeProjectionCreator { | |
| * `inputSchema`. | ||
| */ | ||
| def create(exprs: Seq[Expression], inputSchema: Seq[Attribute]): UnsafeProjection = { | ||
| create(exprs.map(BindReferences.bindReference(_, inputSchema))) | ||
| } | ||
|
|
||
| /** | ||
| * Returns an [[UnsafeProjection]] for given sequence of bound Expressions. | ||
| */ | ||
| protected def createProjection(exprs: Seq[Expression]): UnsafeProjection | ||
| } | ||
|
|
||
| object UnsafeProjection extends UnsafeProjectionCreator { | ||
|
|
||
| override protected def createProjection(exprs: Seq[Expression]): UnsafeProjection = { | ||
| GenerateUnsafeProjection.generate(exprs) | ||
| create(toBoundExprs(exprs, inputSchema)) | ||
| } | ||
|
|
||
| /** | ||
| * Same as other create()'s but allowing enabling/disabling subexpression elimination. | ||
| * TODO: refactor the plumbing and clean this up. | ||
| * The param `subexpressionEliminationEnabled` doesn't guarantee to work. For example, | ||
| * when fallbacking to interpreted execution, it is not supported. | ||
| */ | ||
| def create( | ||
| exprs: Seq[Expression], | ||
| inputSchema: Seq[Attribute], | ||
| subexpressionEliminationEnabled: Boolean): UnsafeProjection = { | ||
| val e = exprs.map(BindReferences.bindReference(_, inputSchema)) | ||
| .map(_ transform { | ||
| case CreateNamedStruct(children) => CreateNamedStructUnsafe(children) | ||
| }) | ||
| GenerateUnsafeProjection.generate(e, subexpressionEliminationEnabled) | ||
| val unsafeExprs = toUnsafeExprs(toBoundExprs(exprs, inputSchema)) | ||
| try { | ||
| GenerateUnsafeProjection.generate(unsafeExprs, subexpressionEliminationEnabled) | ||
| } catch { | ||
| case CodegenError(_) => InterpretedUnsafeProjection.createProjection(unsafeExprs) | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -686,6 +686,17 @@ object SQLConf { | |
| .intConf | ||
| .createWithDefault(100) | ||
|
|
||
| val CODEGEN_OBJECT_FALLBACK = buildConf("spark.sql.test.codegenObject.fallback") | ||
|
||
| .doc("Determines the behavior of any factories extending `CodegenObjectFactory`" + | ||
| " during tests. `fallback` means trying codegen first and then fallbacking to" + | ||
| "interpreted if any compile error happens. Disabling fallback if `codegen-only`." + | ||
| "`interpreted-only` skips codegen and goes interpreted path always. Note that" + | ||
| "this config works only for tests. In production it always runs with `fallback` mode") | ||
| .internal() | ||
| .stringConf | ||
| .checkValues(Set("fallback", "codegen-only", "interpreted-only")) | ||
| .createWithDefault("fallback") | ||
|
|
||
| val CODEGEN_FALLBACK = buildConf("spark.sql.codegen.fallback") | ||
| .internal() | ||
| .doc("When true, (whole stage) codegen could be temporary disabled for the part of query that" + | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.catalyst.expressions | ||
|
|
||
| import org.apache.spark.SparkFunSuite | ||
| import org.apache.spark.sql.catalyst.plans.PlanTestBase | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types.{IntegerType, LongType} | ||
|
|
||
| class CodegenObjectFactorySuite extends SparkFunSuite with PlanTestBase { | ||
|
|
||
| // Given a factory object and corresponding input, checking if `SQLConf.CODEGEN_OBJECT_FALLBACK` | ||
| // can switch between codegen/interpreted implementation. | ||
| private def testCodegenFactory[IN, OUT](factory: CodegenObjectFactory[IN, OUT], | ||
| input: IN, checkerForCodegen: OUT => Unit, checkerForInterpreted: OUT => Unit) = { | ||
|
|
||
| val modes = Seq("codegen-only", "interpreted-only") | ||
| .zip(Seq(checkerForCodegen, checkerForInterpreted)) | ||
|
|
||
| for ((fallbackMode, checker) <- modes) { | ||
| withSQLConf(SQLConf.CODEGEN_OBJECT_FALLBACK.key -> fallbackMode) { | ||
| val obj = factory.createObject(input) | ||
| checker(obj) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("test UnsafeProjection factory") { | ||
| val input = Seq(LongType, IntegerType) | ||
| .zipWithIndex.map(x => BoundReference(x._2, x._1, true)) | ||
|
|
||
| def checkerForCodegen(projection: UnsafeProjection): Unit = { | ||
| assert(projection.getClass.getName.contains("GeneratedClass$SpecificUnsafeProjection")) | ||
| } | ||
|
|
||
| def checkerForInterpreted(projection: UnsafeProjection): Unit = { | ||
| assert(projection.isInstanceOf[InterpretedUnsafeProjection]) | ||
| } | ||
|
|
||
| testCodegenFactory(UnsafeProjection, input, checkerForCodegen, checkerForInterpreted) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} | |
| import org.apache.spark.sql.catalyst.analysis.{ResolveTimeZone, SimpleAnalyzer} | ||
| import org.apache.spark.sql.catalyst.expressions.codegen._ | ||
| import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer | ||
| import org.apache.spark.sql.catalyst.plans.PlanTestBase | ||
| import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project} | ||
| import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} | ||
| import org.apache.spark.sql.internal.SQLConf | ||
|
|
@@ -40,7 +41,7 @@ import org.apache.spark.util.Utils | |
| /** | ||
| * A few helper functions for expression evaluation testing. Mixin this trait to use them. | ||
| */ | ||
| trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { | ||
| trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBase { | ||
| self: SparkFunSuite => | ||
|
|
||
| protected def create_row(values: Any*): InternalRow = { | ||
|
|
@@ -196,39 +197,35 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { | |
| expression: Expression, | ||
| expected: Any, | ||
| inputRow: InternalRow = EmptyRow): Unit = { | ||
| checkEvaluationWithUnsafeProjection(expression, expected, inputRow, UnsafeProjection) | ||
| checkEvaluationWithUnsafeProjection(expression, expected, inputRow, InterpretedUnsafeProjection) | ||
| } | ||
|
|
||
| protected def checkEvaluationWithUnsafeProjection( | ||
| expression: Expression, | ||
| expected: Any, | ||
| inputRow: InternalRow, | ||
| factory: UnsafeProjectionCreator): Unit = { | ||
| val unsafeRow = evaluateWithUnsafeProjection(expression, inputRow, factory) | ||
| val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" | ||
|
|
||
| if (expected == null) { | ||
| if (!unsafeRow.isNullAt(0)) { | ||
| val expectedRow = InternalRow(expected, expected) | ||
| fail("Incorrect evaluation in unsafe mode: " + | ||
| s"$expression, actual: $unsafeRow, expected: $expectedRow$input") | ||
| } | ||
| } else { | ||
| val lit = InternalRow(expected, expected) | ||
| val expectedRow = | ||
| factory.create(Array(expression.dataType, expression.dataType)).apply(lit) | ||
| if (unsafeRow != expectedRow) { | ||
| fail("Incorrect evaluation in unsafe mode: " + | ||
| s"$expression, actual: $unsafeRow, expected: $expectedRow$input") | ||
| for (fallbackMode <- Seq("codegen-only", "interpreted-only")) { | ||
| withSQLConf(SQLConf.CODEGEN_OBJECT_FALLBACK.key -> fallbackMode) { | ||
| val factory = UnsafeProjection | ||
| val unsafeRow = evaluateWithUnsafeProjection(expression, inputRow, factory) | ||
| val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" | ||
|
|
||
| if (expected == null) { | ||
| if (!unsafeRow.isNullAt(0)) { | ||
| val expectedRow = InternalRow(expected, expected) | ||
| fail("Incorrect evaluation in unsafe mode: " + | ||
| s"$expression, actual: $unsafeRow, expected: $expectedRow$input") | ||
| } | ||
| } else { | ||
| val lit = InternalRow(expected, expected) | ||
| val expectedRow = | ||
| factory.create(Array(expression.dataType, expression.dataType)).apply(lit) | ||
| if (unsafeRow != expectedRow) { | ||
| fail("Incorrect evaluation in unsafe mode: " + | ||
| s"$expression, actual: $unsafeRow, expected: $expectedRow$input") | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| protected def evaluateWithUnsafeProjection( | ||
| expression: Expression, | ||
| inputRow: InternalRow = EmptyRow, | ||
| factory: UnsafeProjectionCreator = UnsafeProjection): InternalRow = { | ||
| factory: UnsafeProjection.type = UnsafeProjection): InternalRow = { | ||
|
||
| // SPARK-16489 Explicitly doing code generation twice so code gen will fail if | ||
| // some expression is reusing variable names across different instances. | ||
| // This behavior is tested in ExpressionEvalHelperSuite. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any better name? How about
CodeGeneratorWithInterpretedFallbackand make it extendsCodeGenerator?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why it needs
CodeGenerator? I think thisCodeGeneratorWithInterpretedFallbackjust delegate to variousCodeGenerator(e.g.,GenerateUnsafeProjection) to produce codegen object.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GenerateUnsafeProjectionalso extendsCodeGenerator.Is there any class we want it to extend
CodegenObjectFactorybut notCodeGenerator?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, no doubt
GenerateUnsafeProjectionextendsCodeGenerator. It's the class doing generators of byte codes.Previously
UnsafeProjectionhas its own interfaceUnsafeProjectionCreator, does not extendsCodeGenerator. So currently I let it follow previousUnsafeProjection's API.We can change it to implement
CodeGeneratorand also change the places usingUnsafeProjection, if you think it is necessary.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok let's keep the previous way to minimize code changes. How about just rename it to
CodeGeneratorWithInterpretedFallback?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. Reamed.