Skip to content

Commit a9f200e

Browse files
gatorsmilecloud-fan
authored andcommitted
[SPARK-25832][SQL][BRANCH-2.4] Revert newly added map related functions
## What changes were proposed in this pull request? - Revert [SPARK-23935][SQL] Adding map_entries function: #21236 - Revert [SPARK-23937][SQL] Add map_filter SQL function: #21986 - Revert [SPARK-23940][SQL] Add transform_values SQL function: #22045 - Revert [SPARK-23939][SQL] Add transform_keys function: #22013 - Revert [SPARK-23938][SQL] Add map_zip_with function: #22017 - Revert the changes of map_entries in [SPARK-24331][SPARKR][SQL] Adding arrays_overlap, array_repeat, map_entries to SparkR: #21434 ## How was this patch tested? The existing tests. Closes #22827 from gatorsmile/revertMap2.4. Authored-by: gatorsmile <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent db121a2 commit a9f200e

File tree

17 files changed

+3
-1689
lines changed

17 files changed

+3
-1689
lines changed

R/pkg/NAMESPACE

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,6 @@ exportMethods("%<=>%",
313313
"lower",
314314
"lpad",
315315
"ltrim",
316-
"map_entries",
317316
"map_from_arrays",
318317
"map_keys",
319318
"map_values",

R/pkg/R/functions.R

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ NULL
219219
#' head(select(tmp, sort_array(tmp$v1)))
220220
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
221221
#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
222-
#' head(select(tmp3, map_entries(tmp3$v3), map_keys(tmp3$v3), map_values(tmp3$v3)))
222+
#' head(select(tmp3, map_keys(tmp3$v3), map_values(tmp3$v3)))
223223
#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
224224
#' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp))
225225
#' head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5)))
@@ -3252,19 +3252,6 @@ setMethod("flatten",
32523252
column(jc)
32533253
})
32543254

3255-
#' @details
3256-
#' \code{map_entries}: Returns an unordered array of all entries in the given map.
3257-
#'
3258-
#' @rdname column_collection_functions
3259-
#' @aliases map_entries map_entries,Column-method
3260-
#' @note map_entries since 2.4.0
3261-
setMethod("map_entries",
3262-
signature(x = "Column"),
3263-
function(x) {
3264-
jc <- callJStatic("org.apache.spark.sql.functions", "map_entries", x@jc)
3265-
column(jc)
3266-
})
3267-
32683255
#' @details
32693256
#' \code{map_from_arrays}: Creates a new map column. The array in the first column is used for
32703257
#' keys. The array in the second column is used for values. All elements in the array for key

R/pkg/R/generics.R

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,10 +1076,6 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
10761076
#' @name NULL
10771077
setGeneric("ltrim", function(x, trimString) { standardGeneric("ltrim") })
10781078

1079-
#' @rdname column_collection_functions
1080-
#' @name NULL
1081-
setGeneric("map_entries", function(x) { standardGeneric("map_entries") })
1082-
10831079
#' @rdname column_collection_functions
10841080
#' @name NULL
10851081
setGeneric("map_from_arrays", function(x, y) { standardGeneric("map_from_arrays") })

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1570,13 +1570,8 @@ test_that("column functions", {
15701570
result <- collect(select(df, flatten(df[[1]])))[[1]]
15711571
expect_equal(result, list(list(1L, 2L, 3L, 4L), list(5L, 6L, 7L, 8L)))
15721572

1573-
# Test map_entries(), map_keys(), map_values() and element_at()
1573+
# Test map_keys(), map_values() and element_at()
15741574
df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
1575-
result <- collect(select(df, map_entries(df$map)))[[1]]
1576-
expected_entries <- list(listToStruct(list(key = "x", value = 1)),
1577-
listToStruct(list(key = "y", value = 2)))
1578-
expect_equal(result, list(expected_entries))
1579-
15801575
result <- collect(select(df, map_keys(df$map)))[[1]]
15811576
expect_equal(result, list(list("x", "y")))
15821577

python/pyspark/sql/functions.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2540,26 +2540,6 @@ def map_values(col):
25402540
return Column(sc._jvm.functions.map_values(_to_java_column(col)))
25412541

25422542

2543-
@since(2.4)
2544-
def map_entries(col):
2545-
"""
2546-
Collection function: Returns an unordered array of all entries in the given map.
2547-
2548-
:param col: name of column or expression
2549-
2550-
>>> from pyspark.sql.functions import map_entries
2551-
>>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
2552-
>>> df.select(map_entries("data").alias("entries")).show()
2553-
+----------------+
2554-
| entries|
2555-
+----------------+
2556-
|[[1, a], [2, b]]|
2557-
+----------------+
2558-
"""
2559-
sc = SparkContext._active_spark_context
2560-
return Column(sc._jvm.functions.map_entries(_to_java_column(col)))
2561-
2562-
25632543
@since(2.4)
25642544
def map_from_entries(col):
25652545
"""

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,6 @@ object FunctionRegistry {
414414
expression[MapFromArrays]("map_from_arrays"),
415415
expression[MapKeys]("map_keys"),
416416
expression[MapValues]("map_values"),
417-
expression[MapEntries]("map_entries"),
418417
expression[MapFromEntries]("map_from_entries"),
419418
expression[MapConcat]("map_concat"),
420419
expression[Size]("size"),
@@ -433,13 +432,9 @@ object FunctionRegistry {
433432
expression[ArrayRemove]("array_remove"),
434433
expression[ArrayDistinct]("array_distinct"),
435434
expression[ArrayTransform]("transform"),
436-
expression[MapFilter]("map_filter"),
437435
expression[ArrayFilter]("filter"),
438436
expression[ArrayExists]("exists"),
439437
expression[ArrayAggregate]("aggregate"),
440-
expression[TransformValues]("transform_values"),
441-
expression[TransformKeys]("transform_keys"),
442-
expression[MapZipWith]("map_zip_with"),
443438
expression[ZipWith]("zip_with"),
444439

445440
CreateStruct.registryEntry,

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ object TypeCoercion {
5454
BooleanEquality ::
5555
FunctionArgumentConversion ::
5656
ConcatCoercion(conf) ::
57-
MapZipWithCoercion ::
5857
EltCoercion(conf) ::
5958
CaseWhenCoercion ::
6059
IfCoercion ::
@@ -763,30 +762,6 @@ object TypeCoercion {
763762
}
764763
}
765764

766-
/**
767-
* Coerces key types of two different [[MapType]] arguments of the [[MapZipWith]] expression
768-
* to a common type.
769-
*/
770-
object MapZipWithCoercion extends TypeCoercionRule {
771-
override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
772-
// Lambda function isn't resolved when the rule is executed.
773-
case m @ MapZipWith(left, right, function) if m.arguments.forall(a => a.resolved &&
774-
MapType.acceptsType(a.dataType)) && !m.leftKeyType.sameType(m.rightKeyType) =>
775-
findWiderTypeForTwo(m.leftKeyType, m.rightKeyType) match {
776-
case Some(finalKeyType) if !Cast.forceNullable(m.leftKeyType, finalKeyType) &&
777-
!Cast.forceNullable(m.rightKeyType, finalKeyType) =>
778-
val newLeft = castIfNotSameType(
779-
left,
780-
MapType(finalKeyType, m.leftValueType, m.leftValueContainsNull))
781-
val newRight = castIfNotSameType(
782-
right,
783-
MapType(finalKeyType, m.rightValueType, m.rightValueContainsNull))
784-
MapZipWith(newLeft, newRight, function)
785-
case _ => m
786-
}
787-
}
788-
}
789-
790765
/**
791766
* Coerces the types of [[Elt]] children to expected ones.
792767
*

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala

Lines changed: 0 additions & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -340,174 +340,6 @@ case class MapValues(child: Expression)
340340
override def prettyName: String = "map_values"
341341
}
342342

343-
/**
344-
* Returns an unordered array of all entries in the given map.
345-
*/
346-
@ExpressionDescription(
347-
usage = "_FUNC_(map) - Returns an unordered array of all entries in the given map.",
348-
examples = """
349-
Examples:
350-
> SELECT _FUNC_(map(1, 'a', 2, 'b'));
351-
[{"key":1,"value":"a"},{"key":2,"value":"b"}]
352-
""",
353-
since = "2.4.0")
354-
case class MapEntries(child: Expression) extends UnaryExpression with ExpectsInputTypes {
355-
356-
override def inputTypes: Seq[AbstractDataType] = Seq(MapType)
357-
358-
@transient private lazy val childDataType: MapType = child.dataType.asInstanceOf[MapType]
359-
360-
override def dataType: DataType = {
361-
ArrayType(
362-
StructType(
363-
StructField("key", childDataType.keyType, false) ::
364-
StructField("value", childDataType.valueType, childDataType.valueContainsNull) ::
365-
Nil),
366-
false)
367-
}
368-
369-
override protected def nullSafeEval(input: Any): Any = {
370-
val childMap = input.asInstanceOf[MapData]
371-
val keys = childMap.keyArray()
372-
val values = childMap.valueArray()
373-
val length = childMap.numElements()
374-
val resultData = new Array[AnyRef](length)
375-
var i = 0
376-
while (i < length) {
377-
val key = keys.get(i, childDataType.keyType)
378-
val value = values.get(i, childDataType.valueType)
379-
val row = new GenericInternalRow(Array[Any](key, value))
380-
resultData.update(i, row)
381-
i += 1
382-
}
383-
new GenericArrayData(resultData)
384-
}
385-
386-
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
387-
nullSafeCodeGen(ctx, ev, c => {
388-
val arrayData = ctx.freshName("arrayData")
389-
val numElements = ctx.freshName("numElements")
390-
val keys = ctx.freshName("keys")
391-
val values = ctx.freshName("values")
392-
val isKeyPrimitive = CodeGenerator.isPrimitiveType(childDataType.keyType)
393-
val isValuePrimitive = CodeGenerator.isPrimitiveType(childDataType.valueType)
394-
395-
val wordSize = UnsafeRow.WORD_SIZE
396-
val structSize = UnsafeRow.calculateBitSetWidthInBytes(2) + wordSize * 2
397-
val (isPrimitive, elementSize) = if (isKeyPrimitive && isValuePrimitive) {
398-
(true, structSize + wordSize)
399-
} else {
400-
(false, -1)
401-
}
402-
403-
val allocation =
404-
s"""
405-
|ArrayData $arrayData = ArrayData.allocateArrayData(
406-
| $elementSize, $numElements, " $prettyName failed.");
407-
""".stripMargin
408-
409-
val code = if (isPrimitive) {
410-
val genCodeForPrimitive = genCodeForPrimitiveElements(
411-
ctx, arrayData, keys, values, ev.value, numElements, structSize)
412-
s"""
413-
|if ($arrayData instanceof UnsafeArrayData) {
414-
| $genCodeForPrimitive
415-
|} else {
416-
| ${genCodeForAnyElements(ctx, arrayData, keys, values, ev.value, numElements)}
417-
|}
418-
""".stripMargin
419-
} else {
420-
s"${genCodeForAnyElements(ctx, arrayData, keys, values, ev.value, numElements)}"
421-
}
422-
423-
s"""
424-
|final int $numElements = $c.numElements();
425-
|final ArrayData $keys = $c.keyArray();
426-
|final ArrayData $values = $c.valueArray();
427-
|$allocation
428-
|$code
429-
""".stripMargin
430-
})
431-
}
432-
433-
private def getKey(varName: String, index: String) =
434-
CodeGenerator.getValue(varName, childDataType.keyType, index)
435-
436-
private def getValue(varName: String, index: String) =
437-
CodeGenerator.getValue(varName, childDataType.valueType, index)
438-
439-
private def genCodeForPrimitiveElements(
440-
ctx: CodegenContext,
441-
arrayData: String,
442-
keys: String,
443-
values: String,
444-
resultArrayData: String,
445-
numElements: String,
446-
structSize: Int): String = {
447-
val unsafeArrayData = ctx.freshName("unsafeArrayData")
448-
val baseObject = ctx.freshName("baseObject")
449-
val unsafeRow = ctx.freshName("unsafeRow")
450-
val structsOffset = ctx.freshName("structsOffset")
451-
val offset = ctx.freshName("offset")
452-
val z = ctx.freshName("z")
453-
val calculateHeader = "UnsafeArrayData.calculateHeaderPortionInBytes"
454-
455-
val baseOffset = Platform.BYTE_ARRAY_OFFSET
456-
val wordSize = UnsafeRow.WORD_SIZE
457-
val structSizeAsLong = s"${structSize}L"
458-
459-
val setKey = CodeGenerator.setColumn(unsafeRow, childDataType.keyType, 0, getKey(keys, z))
460-
461-
val valueAssignmentChecked = CodeGenerator.createArrayAssignment(
462-
unsafeRow, childDataType.valueType, values, "1", z, childDataType.valueContainsNull)
463-
464-
s"""
465-
|UnsafeArrayData $unsafeArrayData = (UnsafeArrayData)$arrayData;
466-
|Object $baseObject = $unsafeArrayData.getBaseObject();
467-
|final int $structsOffset = $calculateHeader($numElements) + $numElements * $wordSize;
468-
|UnsafeRow $unsafeRow = new UnsafeRow(2);
469-
|for (int $z = 0; $z < $numElements; $z++) {
470-
| long $offset = $structsOffset + $z * $structSizeAsLong;
471-
| $unsafeArrayData.setLong($z, ($offset << 32) + $structSizeAsLong);
472-
| $unsafeRow.pointTo($baseObject, $baseOffset + $offset, $structSize);
473-
| $setKey;
474-
| $valueAssignmentChecked
475-
|}
476-
|$resultArrayData = $arrayData;
477-
""".stripMargin
478-
}
479-
480-
private def genCodeForAnyElements(
481-
ctx: CodegenContext,
482-
arrayData: String,
483-
keys: String,
484-
values: String,
485-
resultArrayData: String,
486-
numElements: String): String = {
487-
val z = ctx.freshName("z")
488-
val isValuePrimitive = CodeGenerator.isPrimitiveType(childDataType.valueType)
489-
val getValueWithCheck = if (childDataType.valueContainsNull && isValuePrimitive) {
490-
s"$values.isNullAt($z) ? null : (Object)${getValue(values, z)}"
491-
} else {
492-
getValue(values, z)
493-
}
494-
495-
val rowClass = classOf[GenericInternalRow].getName
496-
val genericArrayDataClass = classOf[GenericArrayData].getName
497-
val genericArrayData = ctx.freshName("genericArrayData")
498-
val rowObject = s"new $rowClass(new Object[]{${getKey(keys, z)}, $getValueWithCheck})"
499-
s"""
500-
|$genericArrayDataClass $genericArrayData = ($genericArrayDataClass)$arrayData;
501-
|for (int $z = 0; $z < $numElements; $z++) {
502-
| $genericArrayData.update($z, $rowObject);
503-
|}
504-
|$resultArrayData = $arrayData;
505-
""".stripMargin
506-
}
507-
508-
override def prettyName: String = "map_entries"
509-
}
510-
511343
/**
512344
* Returns the union of all the given maps.
513345
*/

0 commit comments

Comments
 (0)