Skip to content

Commit e3fec1e

Browse files
author
Olivier Girardot
committed
SPARK-7118 Add the coalesce Spark SQL function available in PySpark
No changes to the scala/java part, only changes in Python.
1 parent 8509519 commit e3fec1e

File tree

1 file changed

+21
-1
lines changed

1 file changed

+21
-1
lines changed

python/pyspark/sql/functions.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from pyspark.sql.dataframe import Column, _to_java_column, _to_seq
3131

3232

33-
__all__ = ['countDistinct', 'approxCountDistinct', 'udf']
33+
__all__ = ['countDistinct', 'approxCountDistinct', 'udf', 'coalesce']
3434

3535

3636
def _create_function(name, doc=""):
@@ -75,6 +75,26 @@ def _(col):
7575
__all__.sort()
7676

7777

78+
def coalesce(*cols):
79+
"""Returns the first column that is not null.
80+
81+
>>> df.select(coalesce(df["a"], df["b"])).show()
82+
Coalesce(a,b)
83+
1
84+
3
85+
5
86+
87+
>>> df.select('*', coalesce(df["a"], lit(0.0))).show()
88+
a b Coalesce(a,0.0)
89+
1 2 1.0
90+
null 3 0.0
91+
5 null 5.0
92+
"""
93+
sc = SparkContext._active_spark_context
94+
jc = sc._jvm.functions.coalesce(_to_seq(sc, cols, _to_java_column))
95+
return Column(jc)
96+
97+
7898
def countDistinct(col, *cols):
7999
"""Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
80100

0 commit comments

Comments
 (0)