-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-24121][SQL] Add API for handling expression code generation #21193
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
1df9943
5fe425c
00bef6b
5d9c454
162deb2
d138ee0
ee9a4c0
e7cfa28
5945c15
2b30654
aff411b
53b329a
72faac3
ffbf4ab
d040676
c378ce2
2ca9741
d91f111
4b49e8a
96c594a
00cc564
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.codegen | |
|
|
||
| import java.lang.{Boolean => JBool} | ||
|
|
||
| import scala.collection.mutable.ArrayBuffer | ||
| import scala.language.{existentials, implicitConversions} | ||
|
|
||
| import org.apache.spark.sql.types.{BooleanType, DataType} | ||
|
|
@@ -130,6 +131,8 @@ trait Block extends JavaCode { | |
|
|
||
| def length: Int = toString.length | ||
|
|
||
| def nonEmpty: Boolean = toString.nonEmpty | ||
|
|
||
| // The leading prefix that should be stripped from each line. | ||
| // By default we strip blanks or control characters followed by '|' from the line. | ||
| var _marginChar: Option[Char] = Some('|') | ||
|
|
@@ -167,9 +170,40 @@ object Block { | |
| case other => throw new IllegalArgumentException( | ||
| s"Can not interpolate ${other.getClass.getName} into code block.") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Runtime exception?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or, how about accepting other values as strings? e.g.,
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feels it's more like an illegal argument to string interpolator for now. I'm open for others ideas on this.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd like to limit the types of objects we can interpolate at the first. So there will be less cases I'm not aware of. Can be open to all others later.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +100000 |
||
| } | ||
| CodeBlock(sc.parts, args) | ||
|
|
||
| val (codeParts, blockInputs) = foldLiteralArgs(sc.parts, args) | ||
| CodeBlock(codeParts, blockInputs) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Folds eagerly the literal args into the code parts. | ||
| private def foldLiteralArgs(parts: Seq[String], args: Seq[Any]): (Seq[String], Seq[Any]) = { | ||
| val codeParts = ArrayBuffer.empty[String] | ||
| val blockInputs = ArrayBuffer.empty[Any] | ||
|
||
|
|
||
| val strings = parts.iterator | ||
| val inputs = args.iterator | ||
| val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) | ||
|
|
||
| buf append strings.next | ||
|
||
| while (strings.hasNext) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks good |
||
| val input = inputs.next | ||
| input match { | ||
| case _: ExprValue | _: Block => | ||
| codeParts += buf.toString | ||
| buf.clear | ||
| blockInputs += input | ||
| case _ => | ||
| buf append input | ||
| } | ||
| buf append strings.next | ||
| } | ||
| if (buf.nonEmpty) { | ||
| codeParts += buf.toString | ||
| } | ||
|
|
||
| (codeParts.toSeq, blockInputs.toSeq) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -182,11 +216,10 @@ case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Bloc | |
| blockInputs.flatMap { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think |
||
| case b: Block => b.exprValues | ||
| case e: ExprValue => Set(e) | ||
| case _ => Set.empty[ExprValue] | ||
| }.toSet | ||
| } | ||
|
|
||
| override def code: String = { | ||
| override lazy val code: String = { | ||
| val strings = codeParts.iterator | ||
| val inputs = blockInputs.iterator | ||
| val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) | ||
|
|
@@ -207,7 +240,7 @@ case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Bloc | |
|
|
||
| case class Blocks(blocks: Seq[Block]) extends Block { | ||
| override lazy val exprValues: Set[ExprValue] = blocks.flatMap(_.exprValues).toSet | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's required. Otherwise a type mismatch compile error.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, sorry, |
||
| override def code: String = blocks.map(_.toString).mkString("\n") | ||
| override lazy val code: String = blocks.map(_.toString).mkString("\n") | ||
|
|
||
| override def + (other: Block): Block = other match { | ||
| case c: CodeBlock => Blocks(blocks :+ c) | ||
|
|
@@ -217,7 +250,7 @@ case class Blocks(blocks: Seq[Block]) extends Block { | |
| } | ||
|
|
||
| object EmptyBlock extends Block with Serializable { | ||
| override def code: String = "" | ||
| override val code: String = "" | ||
| override val exprValues: Set[ExprValue] = Set.empty | ||
|
|
||
| override def + (other: Block): Block = other | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,12 +23,20 @@ import org.apache.spark.sql.types.{BooleanType, IntegerType} | |
|
|
||
| class CodeBlockSuite extends SparkFunSuite { | ||
|
|
||
| test("Block can interpolate string and ExprValue inputs") { | ||
| test("Block interpolates string and ExprValue inputs") { | ||
| val isNull = JavaCode.isNullVariable("expr1_isNull") | ||
| val code = code"boolean ${isNull} = ${JavaCode.defaultLiteral(BooleanType)};" | ||
| val stringLiteral = "false" | ||
| val code = code"boolean $isNull = $stringLiteral;" | ||
| assert(code.toString == "boolean expr1_isNull = false;") | ||
| } | ||
|
|
||
| test("Literals are folded into string code parts instead of block inputs") { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great, I like it! |
||
| val value = JavaCode.variable("expr1", IntegerType) | ||
| val intLiteral = 1 | ||
| val code = code"int $value = $intLiteral;" | ||
| assert(code.asInstanceOf[CodeBlock].blockInputs === Seq(value)) | ||
| } | ||
|
|
||
| test("Block.stripMargin") { | ||
| val isNull = JavaCode.isNullVariable("expr1_isNull") | ||
| val value = JavaCode.variable("expr1", IntegerType) | ||
|
|
@@ -92,26 +100,26 @@ class CodeBlockSuite extends SparkFunSuite { | |
| } | ||
|
|
||
| test("Throws exception when interpolating unexcepted object in code block") { | ||
| val obj = TestClass(100) | ||
| val obj = Tuple2(1, 1) | ||
| val e = intercept[IllegalArgumentException] { | ||
| code"$obj" | ||
| } | ||
| assert(e.getMessage().contains(s"Can not interpolate ${obj.getClass.getName}")) | ||
| } | ||
|
|
||
| test("replace expr values in code block") { | ||
| val statement = JavaCode.expression("1 + 1", IntegerType) | ||
| val expr = JavaCode.expression("1 + 1", IntegerType) | ||
| val isNull = JavaCode.isNullVariable("expr1_isNull") | ||
| val exprInFunc = JavaCode.variable("expr1", IntegerType) | ||
|
|
||
| val code = | ||
| code""" | ||
| |callFunc(int $statement) { | ||
| |callFunc(int $expr) { | ||
| | boolean $isNull = false; | ||
| | int $exprInFunc = $statement + 1; | ||
| | int $exprInFunc = $expr + 1; | ||
| |}""".stripMargin | ||
|
|
||
| val aliasedParam = JavaCode.variable("aliased", statement.javaType) | ||
| val aliasedParam = JavaCode.variable("aliased", expr.javaType) | ||
| val aliasedInputs = code.asInstanceOf[CodeBlock].blockInputs.map { | ||
| case _: SimpleExprValue => aliasedParam | ||
| case other => other | ||
|
|
@@ -126,5 +134,3 @@ class CodeBlockSuite extends SparkFunSuite { | |
| assert(aliasedCode.toString == expected.toString) | ||
| } | ||
| } | ||
|
|
||
| private case class TestClass(a: Int) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When do we need this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Like:
It's basically for compatibility. We can remove this and disallow
stripMargin(customPrefix). WDYT?