Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 154 additions & 38 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -15800,7 +15800,7 @@ def map_contains_key(col: "ColumnOrName", value: Any) -> Column:
@_try_remote_functions
def map_keys(col: "ColumnOrName") -> Column:
"""
Collection function: Returns an unordered array containing the keys of the map.
Map function: Returns an unordered array containing the keys of the map.

.. versionadded:: 2.3.0

Expand All @@ -15810,31 +15810,69 @@ def map_keys(col: "ColumnOrName") -> Column:
Parameters
----------
col : :class:`~pyspark.sql.Column` or str
name of column or expression
Name of column or expression

Returns
-------
:class:`~pyspark.sql.Column`
keys of the map as an array.
Keys of the map as an array.

Examples
--------
>>> from pyspark.sql.functions import map_keys
Example 1: Extracting keys from a simple map

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
>>> df.select(map_keys("data").alias("keys")).show()
+------+
| keys|
+------+
|[1, 2]|
+------+
>>> df.select(sf.sort_array(sf.map_keys("data"))).show()
+--------------------------------+
|sort_array(map_keys(data), true)|
+--------------------------------+
| [1, 2]|
+--------------------------------+

Example 2: Extracting keys from a map with complex keys

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT map(array(1, 2), 'a', array(3, 4), 'b') as data")
>>> df.select(sf.sort_array(sf.map_keys("data"))).show()
+--------------------------------+
|sort_array(map_keys(data), true)|
+--------------------------------+
| [[1, 2], [3, 4]]|
+--------------------------------+

Example 3: Extracting keys from a map with duplicate keys

>>> from pyspark.sql import functions as sf
>>> originalmapKeyDedupPolicy = spark.conf.get("spark.sql.mapKeyDedupPolicy")
>>> spark.conf.set("spark.sql.mapKeyDedupPolicy", "LAST_WIN")
>>> df = spark.sql("SELECT map(1, 'a', 1, 'b') as data")
>>> df.select(sf.map_keys("data")).show()
+--------------+
|map_keys(data)|
+--------------+
| [1]|
+--------------+
>>> spark.conf.set("spark.sql.mapKeyDedupPolicy", originalmapKeyDedupPolicy)

Example 4: Extracting keys from an empty map

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT map() as data")
>>> df.select(sf.map_keys("data")).show()
+--------------+
|map_keys(data)|
+--------------+
| []|
+--------------+
"""
return _invoke_function_over_columns("map_keys", col)


@_try_remote_functions
def map_values(col: "ColumnOrName") -> Column:
"""
Collection function: Returns an unordered array containing the values of the map.
Map function: Returns an unordered array containing the values of the map.

.. versionadded:: 2.3.0

Expand All @@ -15844,64 +15882,142 @@ def map_values(col: "ColumnOrName") -> Column:
Parameters
----------
col : :class:`~pyspark.sql.Column` or str
name of column or expression
Name of column or expression

Returns
-------
:class:`~pyspark.sql.Column`
values of the map as an array.
Values of the map as an array.

Examples
--------
>>> from pyspark.sql.functions import map_values
Example 1: Extracting values from a simple map

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
>>> df.select(map_values("data").alias("values")).show()
+------+
|values|
+------+
|[a, b]|
+------+
>>> df.select(sf.sort_array(sf.map_values("data"))).show()
+----------------------------------+
|sort_array(map_values(data), true)|
+----------------------------------+
| [a, b]|
+----------------------------------+

Example 2: Extracting values from a map with complex values

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT map(1, array('a', 'b'), 2, array('c', 'd')) as data")
>>> df.select(sf.sort_array(sf.map_values("data"))).show()
+----------------------------------+
|sort_array(map_values(data), true)|
+----------------------------------+
| [[a, b], [c, d]]|
+----------------------------------+

Example 3: Extracting values from a map with null values

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT map(1, null, 2, 'b') as data")
>>> df.select(sf.sort_array(sf.map_values("data"))).show()
+----------------------------------+
|sort_array(map_values(data), true)|
+----------------------------------+
| [NULL, b]|
+----------------------------------+

Example 4: Extracting values from a map with duplicate values

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT map(1, 'a', 2, 'a') as data")
>>> df.select(sf.map_values("data")).show()
+----------------+
|map_values(data)|
+----------------+
| [a, a]|
+----------------+

Example 5: Extracting values from an empty map

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT map() as data")
>>> df.select(sf.map_values("data")).show()
+----------------+
|map_values(data)|
+----------------+
| []|
+----------------+
"""
return _invoke_function_over_columns("map_values", col)


@_try_remote_functions
def map_entries(col: "ColumnOrName") -> Column:
"""
Collection function: Returns an unordered array of all entries in the given map.
Map function: Returns an unordered array of all entries in the given map.

.. versionadded:: 3.0.0

.. versionchanged:: 3.4.0
Supports Spark Connect.
Spark Connect.

Parameters
----------
col : :class:`~pyspark.sql.Column` or str
name of column or expression
Name of column or expression

Returns
-------
:class:`~pyspark.sql.Column`
an array of key value pairs as a struct type
An array of key value pairs as a struct type

Examples
--------
>>> from pyspark.sql.functions import map_entries
Example 1: Extracting entries from a simple map

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
>>> df = df.select(map_entries("data").alias("entries"))
>>> df.show()
+----------------+
| entries|
+----------------+
|[{1, a}, {2, b}]|
+----------------+
>>> df.printSchema()
root
|-- entries: array (nullable = false)
| |-- element: struct (containsNull = false)
| | |-- key: integer (nullable = false)
| | |-- value: string (nullable = false)
>>> df.select(sf.sort_array(sf.map_entries("data"))).show()
+-----------------------------------+
|sort_array(map_entries(data), true)|
+-----------------------------------+
| [{1, a}, {2, b}]|
+-----------------------------------+

Example 2: Extracting entries from a map with complex keys and values

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT map(array(1, 2), array('a', 'b'), "
... "array(3, 4), array('c', 'd')) as data")
>>> df.select(sf.sort_array(sf.map_entries("data"))).show(truncate=False)
+------------------------------------+
|sort_array(map_entries(data), true) |
+------------------------------------+
|[{[1, 2], [a, b]}, {[3, 4], [c, d]}]|
+------------------------------------+

Example 3: Extracting entries from a map with duplicate keys

>>> from pyspark.sql import functions as sf
>>> originalmapKeyDedupPolicy = spark.conf.get("spark.sql.mapKeyDedupPolicy")
>>> spark.conf.set("spark.sql.mapKeyDedupPolicy", "LAST_WIN")
>>> df = spark.sql("SELECT map(1, 'a', 1, 'b') as data")
>>> df.select(sf.map_entries("data")).show()
+-----------------+
|map_entries(data)|
+-----------------+
| [{1, b}]|
+-----------------+
>>> spark.conf.set("spark.sql.mapKeyDedupPolicy", originalmapKeyDedupPolicy)

Example 4: Extracting entries from an empty map

>>> from pyspark.sql import functions as sf
>>> df = spark.sql("SELECT map() as data")
>>> df.select(sf.map_entries("data")).show()
+-----------------+
|map_entries(data)|
+-----------------+
| []|
+-----------------+
"""
return _invoke_function_over_columns("map_entries", col)

Expand Down