-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-15647] [SQL] Fix Boundary Cases in OptimizeCodegen Rule #13392
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
40b70f3
aaf5f2e
f351c10
ecc4318
db67f8c
414e116
4306c4f
b2849e8
9830e31
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -937,7 +937,8 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper { | |
| */ | ||
| case class OptimizeCodegen(conf: CatalystConf) extends Rule[LogicalPlan] { | ||
| def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { | ||
| case e @ CaseWhen(branches, _) if branches.size < conf.maxCaseBranchesForCodegen => | ||
| case e @ CaseWhen(branches, elseBranch) | ||
| if branches.size + elseBranch.size <= conf.maxCaseBranchesForCodegen => | ||
| e.toCodegen() | ||
|
||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,13 +17,36 @@ | |
|
|
||
| package org.apache.spark.sql.internal | ||
|
|
||
| import org.scalatest.BeforeAndAfterAll | ||
|
|
||
| import org.apache.spark.sql.{QueryTest, Row, SparkSession, SQLContext} | ||
| import org.apache.spark.sql.execution.WholeStageCodegenExec | ||
| import org.apache.spark.sql.test.{SharedSQLContext, TestSQLContext} | ||
|
|
||
| class SQLConfSuite extends QueryTest with SharedSQLContext { | ||
| class SQLConfSuite extends QueryTest with SharedSQLContext with BeforeAndAfterAll { | ||
|
||
| import testImplicits._ | ||
|
|
||
| private val testKey = "test.key.0" | ||
| private val testVal = "test.val.0" | ||
|
|
||
| override def beforeAll() { | ||
| super.beforeAll() | ||
| sql("DROP TABLE IF EXISTS testData") | ||
| spark | ||
| .range(10) | ||
| .select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) | ||
| .write | ||
| .saveAsTable("testData") | ||
|
||
| } | ||
|
|
||
| override def afterAll(): Unit = { | ||
| try { | ||
| sql("DROP TABLE IF EXISTS testData") | ||
| } finally { | ||
| super.afterAll() | ||
| } | ||
| } | ||
|
|
||
| test("propagate from spark conf") { | ||
| // We create a new context here to avoid order dependence with other tests that might call | ||
| // clear(). | ||
|
|
@@ -219,4 +242,32 @@ class SQLConfSuite extends QueryTest with SharedSQLContext { | |
| } | ||
| } | ||
|
|
||
| test("MAX_CASES_BRANCHES") { | ||
| val original = spark.conf.get(SQLConf.MAX_CASES_BRANCHES) | ||
| try { | ||
| val sql_one_branch_caseWhen = "SELECT CASE WHEN a = 1 THEN 1 END FROM testData" | ||
| val sql_two_branch_caseWhen = "SELECT CASE WHEN a = 1 THEN 1 ELSE 0 END FROM testData" | ||
|
|
||
| spark.conf.set(SQLConf.MAX_CASES_BRANCHES.key, "0") | ||
| assert(!sql(sql_one_branch_caseWhen) | ||
| .queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec]) | ||
| assert(!sql(sql_two_branch_caseWhen) | ||
| .queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec]) | ||
|
|
||
| spark.conf.set(SQLConf.MAX_CASES_BRANCHES.key, "1") | ||
| assert(sql(sql_one_branch_caseWhen) | ||
| .queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec]) | ||
| assert(!sql(sql_two_branch_caseWhen) | ||
| .queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec]) | ||
|
|
||
| spark.conf.set(SQLConf.MAX_CASES_BRANCHES.key, "2") | ||
| assert(sql(sql_one_branch_caseWhen) | ||
| .queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec]) | ||
| assert(sql(sql_two_branch_caseWhen) | ||
| .queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec]) | ||
| } finally { | ||
| spark.conf.set(SQLConf.MAX_CASES_BRANCHES.key, s"$original") | ||
| } | ||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reading the case takes a while and and I think it'd greatly benefit from introducing a local
def- a predicate - for the condition (I can't figure out a name for this, sorry)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not good at naming. How about
canCodegen? : )