-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-24762][SQL] Enable Option of Product encoders #21732
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
e1b5dee
80506f4
ed3d5cb
9fc3f61
5f95bd0
a4f0405
c1f798f
80e11d2
0f029b0
0ffbf18
16af64c
79d10c1
fec1cac
e8737d4
3956cdd
8304de8
3a8a047
2d2057b
91d2b8b
dbd8678
62fdb17
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -149,6 +149,7 @@ object VeryComplexResultAgg extends Aggregator[Row, String, ComplexAggData] { | |
|
|
||
|
|
||
| case class OptionBooleanData(name: String, isGood: Option[Boolean]) | ||
| case class OptionBooleanIntData(name: String, isGood: Option[(Boolean, Int)]) | ||
|
|
||
| case class OptionBooleanAggregator(colName: String) | ||
| extends Aggregator[Row, Option[Boolean], Option[Boolean]] { | ||
|
|
@@ -183,6 +184,43 @@ case class OptionBooleanAggregator(colName: String) | |
| def OptionalBoolEncoder: Encoder[Option[Boolean]] = ExpressionEncoder() | ||
| } | ||
|
|
||
| case class OptionBooleanIntAggregator(colName: String) | ||
| extends Aggregator[Row, Option[(Boolean, Int)], Option[(Boolean, Int)]] { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the expected schema after we apply an aggregator with
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For a non top-level encoder, the output schema of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. assuming non top level,
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. For non top level, |
||
|
|
||
| override def zero: Option[(Boolean, Int)] = None | ||
|
|
||
| override def reduce(buffer: Option[(Boolean, Int)], row: Row): Option[(Boolean, Int)] = { | ||
| val index = row.fieldIndex(colName) | ||
| val value = if (row.isNullAt(index)) { | ||
| Option.empty[(Boolean, Int)] | ||
| } else { | ||
| val nestedRow = row.getStruct(index) | ||
| Some((nestedRow.getBoolean(0), nestedRow.getInt(1))) | ||
| } | ||
| merge(buffer, value) | ||
| } | ||
|
|
||
| override def merge( | ||
| b1: Option[(Boolean, Int)], | ||
| b2: Option[(Boolean, Int)]): Option[(Boolean, Int)] = { | ||
| if ((b1.isDefined && b1.get._1) || (b2.isDefined && b2.get._1)) { | ||
| val newInt = b1.map(_._2).getOrElse(0) + b2.map(_._2).getOrElse(0) | ||
| Some((true, newInt)) | ||
| } else if (b1.isDefined) { | ||
| b1 | ||
| } else { | ||
| b2 | ||
| } | ||
| } | ||
|
|
||
| override def finish(reduction: Option[(Boolean, Int)]): Option[(Boolean, Int)] = reduction | ||
|
|
||
| override def bufferEncoder: Encoder[Option[(Boolean, Int)]] = OptionalBoolIntEncoder | ||
| override def outputEncoder: Encoder[Option[(Boolean, Int)]] = OptionalBoolIntEncoder | ||
|
|
||
| def OptionalBoolIntEncoder: Encoder[Option[(Boolean, Int)]] = ExpressionEncoder(topLevel = false) | ||
| } | ||
|
|
||
| class DatasetAggregatorSuite extends QueryTest with SharedSQLContext { | ||
| import testImplicits._ | ||
|
|
||
|
|
@@ -393,4 +431,17 @@ class DatasetAggregatorSuite extends QueryTest with SharedSQLContext { | |
| assert(grouped.schema == df.schema) | ||
| checkDataset(grouped.as[OptionBooleanData], OptionBooleanData("bob", Some(true))) | ||
| } | ||
|
|
||
| test("SPARK-24762: Aggregator should be able to use Option of Product encoder") { | ||
| val df = Seq( | ||
| OptionBooleanIntData("bob", Some((true, 1))), | ||
| OptionBooleanIntData("bob", Some((false, 2))), | ||
| OptionBooleanIntData("bob", None)).toDF() | ||
| val group = df | ||
| .groupBy("name") | ||
| .agg(OptionBooleanIntAggregator("isGood").toColumn.alias("isGood")) | ||
| assert(df.schema == group.schema) | ||
|
||
| checkAnswer(group, Row("bob", Row(true, 3)) :: Nil) | ||
| checkDataset(group.as[OptionBooleanIntData], OptionBooleanIntData("bob", Some((true, 3)))) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
where do we call this apply with
topLevel = false?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In
Aggregator, we can call this apply withtopLevel = falseto avoid resulting a nested struct.