-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-47007][SQL] Add the MapSort expression
#45639
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 26 commits
a081649
1441549
1be06e3
249e903
aaae883
acaf95e
5619fdb
7754c14
f0ebf5d
1f78167
a5eb480
9497f99
5e7a033
ab70f1e
e79d65c
a435355
c9901d0
da6a710
81008c2
86b29c5
c08ab6c
31a797c
69e3b48
51ab204
8d9ac51
2951bcc
0fc3c6a
0c7d21a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -888,6 +888,157 @@ case class MapFromEntries(child: Expression) | |
| copy(child = newChild) | ||
| } | ||
|
|
||
| case class MapSort(base: Expression, ascendingOrder: Expression) | ||
| extends BinaryExpression with NullIntolerant with QueryErrorsBase { | ||
|
|
||
| def this(e: Expression) = this(e, Literal(true)) | ||
|
|
||
| val keyType: DataType = base.dataType.asInstanceOf[MapType].keyType | ||
| val valueType: DataType = base.dataType.asInstanceOf[MapType].valueType | ||
|
|
||
| override def left: Expression = base | ||
| override def right: Expression = ascendingOrder | ||
| override def dataType: DataType = base.dataType | ||
|
|
||
| override def checkInputDataTypes(): TypeCheckResult = base.dataType match { | ||
| case m: MapType if RowOrdering.isOrderable(m.keyType) => | ||
| ascendingOrder match { | ||
| case Literal(_: Boolean, BooleanType) => | ||
| TypeCheckResult.TypeCheckSuccess | ||
|
||
| case _ => | ||
| DataTypeMismatch( | ||
| errorSubClass = "UNEXPECTED_INPUT_TYPE", | ||
| messageParameters = Map( | ||
| "paramIndex" -> ordinalNumber(1), | ||
| "requiredType" -> toSQLType(BooleanType), | ||
| "inputSql" -> toSQLExpr(ascendingOrder), | ||
| "inputType" -> toSQLType(ascendingOrder.dataType)) | ||
| ) | ||
| } | ||
| case _: MapType => | ||
| DataTypeMismatch( | ||
| errorSubClass = "INVALID_ORDERING_TYPE", | ||
| messageParameters = Map( | ||
| "functionName" -> toSQLId(prettyName), | ||
| "dataType" -> toSQLType(base.dataType) | ||
| ) | ||
| ) | ||
| case _ => | ||
| DataTypeMismatch( | ||
| errorSubClass = "UNEXPECTED_INPUT_TYPE", | ||
| messageParameters = Map( | ||
| "paramIndex" -> ordinalNumber(0), | ||
| "requiredType" -> toSQLType(MapType), | ||
| "inputSql" -> toSQLExpr(base), | ||
| "inputType" -> toSQLType(base.dataType)) | ||
| ) | ||
| } | ||
|
|
||
| override def nullSafeEval(array: Any, ascending: Any): Any = { | ||
| // put keys and their respective values inside a tuple and sort them | ||
| // according to the key ordering. Extract the new sorted k/v pairs to form a sorted map | ||
|
|
||
| val mapData = array.asInstanceOf[MapData] | ||
| val numElements = mapData.numElements() | ||
| val keys = mapData.keyArray() | ||
| val values = mapData.valueArray() | ||
|
|
||
| val ordering = if (ascending.asInstanceOf[Boolean]) { | ||
| PhysicalDataType.ordering(keyType) | ||
| } else { | ||
| PhysicalDataType.ordering(keyType).reverse | ||
| } | ||
|
|
||
| val sortedMap = Array | ||
| .tabulate(numElements)(i => (keys.get(i, keyType).asInstanceOf[Any], | ||
| values.get(i, valueType).asInstanceOf[Any])) | ||
| .sortBy(_._1)(ordering) | ||
|
|
||
| new ArrayBasedMapData(new GenericArrayData(sortedMap.map(_._1)), | ||
| new GenericArrayData(sortedMap.map(_._2))) | ||
| } | ||
|
|
||
| override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
| nullSafeCodeGen(ctx, ev, (b, order) => sortCodegen(ctx, ev, b, order)) | ||
| } | ||
|
|
||
| private def sortCodegen(ctx: CodegenContext, ev: ExprCode, | ||
| base: String, order: String): String = { | ||
|
|
||
| val arrayBasedMapData = classOf[ArrayBasedMapData].getName | ||
| val genericArrayData = classOf[GenericArrayData].getName | ||
|
|
||
| val numElements = ctx.freshName("numElements") | ||
| val keys = ctx.freshName("keys") | ||
| val values = ctx.freshName("values") | ||
| val sortArray = ctx.freshName("sortArray") | ||
| val i = ctx.freshName("i") | ||
| val o1 = ctx.freshName("o1") | ||
| val o1entry = ctx.freshName("o1entry") | ||
| val o2 = ctx.freshName("o2") | ||
| val o2entry = ctx.freshName("o2entry") | ||
| val c = ctx.freshName("c") | ||
| val newKeys = ctx.freshName("newKeys") | ||
| val newValues = ctx.freshName("newValues") | ||
|
|
||
| val boxedKeyType = CodeGenerator.boxedType(keyType) | ||
| val boxedValueType = CodeGenerator.boxedType(valueType) | ||
| val javaKeyType = CodeGenerator.javaType(keyType) | ||
|
|
||
| val simpleEntryType = s"java.util.AbstractMap.SimpleEntry<$boxedKeyType, $boxedValueType>" | ||
|
|
||
| val comp = if (CodeGenerator.isPrimitiveType(keyType)) { | ||
| val v1 = ctx.freshName("v1") | ||
| val v2 = ctx.freshName("v2") | ||
| s""" | ||
| |$javaKeyType $v1 = (($boxedKeyType) $o1).${javaKeyType}Value(); | ||
| |$javaKeyType $v2 = (($boxedKeyType) $o2).${javaKeyType}Value(); | ||
| |int $c = ${ctx.genComp(keyType, v1, v2)}; | ||
| """.stripMargin | ||
| } else { | ||
| s"int $c = ${ctx.genComp(keyType, s"(($javaKeyType) $o1)", s"(($javaKeyType) $o2)")};" | ||
| } | ||
|
|
||
| s""" | ||
| |final int $numElements = $base.numElements(); | ||
| |ArrayData $keys = $base.keyArray(); | ||
| |ArrayData $values = $base.valueArray(); | ||
| | | ||
| |Object[] $sortArray = new Object[$numElements]; | ||
| | | ||
| |for (int $i = 0; $i < $numElements; $i++) { | ||
| | $sortArray[$i] = new $simpleEntryType( | ||
| | ${CodeGenerator.getValue(keys, keyType, i)}, | ||
| | ${CodeGenerator.getValue(values, valueType, i)}); | ||
| |} | ||
| | | ||
| |java.util.Arrays.sort($sortArray, new java.util.Comparator<Object>() { | ||
| | @Override public int compare(Object $o1entry, Object $o2entry) { | ||
| | Object $o1 = (($simpleEntryType) $o1entry).getKey(); | ||
| | Object $o2 = (($simpleEntryType) $o2entry).getKey(); | ||
| | $comp; | ||
| | return $order ? $c : -$c; | ||
|
||
| | } | ||
| |}); | ||
| | | ||
| |Object[] $newKeys = new Object[$numElements]; | ||
| |Object[] $newValues = new Object[$numElements]; | ||
| | | ||
| |for (int $i = 0; $i < $numElements; $i++) { | ||
| | $newKeys[$i] = (($simpleEntryType) $sortArray[$i]).getKey(); | ||
| | $newValues[$i] = (($simpleEntryType) $sortArray[$i]).getValue(); | ||
| |} | ||
| | | ||
| |${ev.value} = new $arrayBasedMapData( | ||
| | new $genericArrayData($newKeys), new $genericArrayData($newValues)); | ||
| |""".stripMargin | ||
| } | ||
|
|
||
| override def prettyName: String = "map_sort" | ||
|
||
|
|
||
| override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression) | ||
| : MapSort = copy(base = newLeft, ascendingOrder = newRight) | ||
| } | ||
|
|
||
| /** | ||
| * Common base class for [[SortArray]] and [[ArraySort]]. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't the
BinaryExpressionrequire two expressions here? Do we demote this to UnaryExpression?EDIT:
ExpressionforascendingOrderin array sorting has been set as well.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's is the internal use-cases for the expression? Do we need this parameter at all?
Seems like you are going to pass
trueasascendingOrderalways athttps://github.com/apache/spark/pull/45549/files#diff-11264d807efa58054cca2d220aae8fba644ee0f0f2a4722c46d52828394846efR2488
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From the point of internal use, we don't need it. Refactored expression as
UnaryExpressionand removed ordering altogether.